+
+
+
+
+
+ chrom_1
+ start_1
+ end_1
+ chrom_2
+ start_2
+ end_2
+ region_chrom
+ region_start
+ region_end
+ region_id
+
+
+
+
+ 0
+ chr1
+ 100
+ 200
+ chr1
+ 1000
+ 2000
+ chr1
+ 100
+ 400
+ 0
+
+
+ 1
+ chr1
+ 2000
+ 3000
+ chr1
+ 200
+ 300
+ chr1
+ 100
+ 400
+ 0
+
+
+ 2
+ chr1
+ 3000
+ 4000
+ chr1
+ 300
+ 400
+ chr1
+ 100
+ 400
+ 0
+
+
+
+
+
+
+
+We can also restrict the positions to filter on, by passing different anchor parameters. For example, we can filter for contacts, where the first position overlaps with our target:
+
+
+```python
+query_steps = [
+ Overlap(target_region, anchor_mode=Anchor(mode="ANY", anchors=[1]))
+]
+Query(query_steps=query_steps)\
+ .build(contacts)\
+ .compute()\
+ .filter(regex=r"chrom|start|end|id")
+```
+
+
+
+
+
+
+
+
+
+
+ chrom_1
+ start_1
+ end_1
+ chrom_2
+ start_2
+ end_2
+ region_chrom
+ region_start
+ region_end
+ region_id
+
+
+
+
+ 0
+ chr1
+ 100
+ 200
+ chr1
+ 1000
+ 2000
+ chr1
+ 100
+ 400
+ 0
+
+
+
+
+
+
+
+This time, only the first contact overlaps.
+
+The same functionality is implemented also for the Pixels class
+
+## Selecting a subset of contacts at multiple genomic regions
+The Overlap class is also capable of selecting contacts at multiple genomic regions. Here, the behavior of `Overlap` deviates from a simple filter, because if a given contact overlaps with multiple regions, it will be returned multiple times.
+
+Specify target regions
+
+
+```python
+target_regions = pd.DataFrame({
+ "chrom": ['chr1', 'chr1'],
+ "start": [100, 150],
+ "end": [400, 200],
+})
+```
+
+
+```python
+query_steps = [
+ Overlap(target_regions, anchor_mode=Anchor(mode="ANY", anchors=[1]))
+]
+Query(query_steps=query_steps)\
+ .build(contacts)\
+ .compute()\
+ .filter(regex=r"chrom|start|end|id")
+```
+
+
+
+
+
+
+
+
+
+
+ chrom_1
+ start_1
+ end_1
+ chrom_2
+ start_2
+ end_2
+ region_chrom
+ region_start
+ region_end
+ region_id
+
+
+
+
+ 0
+ chr1
+ 100
+ 200
+ chr1
+ 1000
+ 2000
+ chr1
+ 100
+ 400
+ 0
+
+
+ 1
+ chr1
+ 100
+ 200
+ chr1
+ 1000
+ 2000
+ chr1
+ 150
+ 200
+ 1
+
+
+
+
+
+
+
+In this example, the contact overlapping both regions is duplicated.
+
+The same functionality is implemented also for the pixels class.
+
+## Calculating the distance to a target region and aggregating the result
+In this example, we calculate the distance of pixels to target regions and aggregate based on the distances. This is a very common use case in so-called pileup analyses, where we want to investigate the average behavior around regions of interest.
+
+
+```python
+from spoc.pixels import Pixels
+from spoc.query_engine import DistanceTransformation, DistanceMode
+import pandas as pd
+import numpy as np
+from itertools import product
+```
+
+First we define a set of target pixels
+
+
+```python
+def complete_synthetic_pixels():
+ """Pixels that span two regions densely"""
+ np.random.seed(42)
+ # genomic region_1
+ pixels_1 = [
+ {
+ "chrom": tup[0],
+ "start_1": tup[1],
+ "start_2": tup[2],
+ "start_3": tup[3],
+ "count": np.random.randint(0, 10),
+ }
+ for tup in product(
+ ["chr1"],
+ np.arange(900_000, 1_150_000, 50_000),
+ np.arange(900_000, 1_150_000, 50_000),
+ np.arange(900_000, 1_150_000, 50_000),
+ )
+ ]
+ # genomic region_2
+ pixels_2 = [
+ {
+ "chrom": tup[0],
+ "start_1": tup[1],
+ "start_2": tup[2],
+ "start_3": tup[3],
+ "count": np.random.randint(0, 10),
+ }
+ for tup in product(
+ ["chr2"],
+ np.arange(900_000, 1_150_000, 50_000),
+ np.arange(900_000, 1_150_000, 50_000),
+ np.arange(900_000, 1_150_000, 50_000),
+ )
+ ]
+ return pd.concat((pd.DataFrame(pixels_1), pd.DataFrame(pixels_2)))
+```
+
+
+```python
+pixels = Pixels(complete_synthetic_pixels(), number_fragments=3, binsize=50_000)
+```
+
+Then we define the target regions we are interested in.
+
+
+```python
+target_regions = pd.DataFrame(
+ {
+ "chrom": ["chr1", "chr2"],
+ "start": [900_000, 900_000],
+ "end": [1_100_000, 1_100_000],
+ }
+ )
+```
+
+We are then interested in selecting all contacts that are contained within these pixels and then calculate the distance to them. The selection step can be done with the `Overlap` class that we described above. The distance transformation can be done with the `DistanceTransformation` query step. This query step takes an instance of genomic data that contains regions (as defined by it's schema) and calculates the distance to all position columns. All distances are calculated with regards to the center of each assigned region. Since genomic positions are defined by a start and end,the `DistanceTransformation` query step has a `DistanceMode` parameter that defines whether we would like to calculate the distance with regard to the start of a genomic position, the end or it's center.
+
+
+```python
+query_steps = [
+ Overlap(target_regions, anchor_mode=Anchor(mode="ANY")),
+ DistanceTransformation(
+ distance_mode=DistanceMode.LEFT,
+ ),
+]
+```
+
+We can then execute this query plan using the Query class. This well add an distance column to the genomic dataset returned.
+
+
+```python
+Query(query_steps=query_steps)\
+ .build(pixels)\
+ .compute()\
+ .filter(regex=r"chrom|distance")
+```
+
+
+
+
+
+
+
+
+
+
+ chrom_1
+ chrom_2
+ chrom_3
+ region_chrom
+ distance_1
+ distance_2
+ distance_3
+
+
+
+
+ 0
+ chr1
+ chr1
+ chr1
+ chr1
+ -100000.0
+ -100000.0
+ -100000.0
+
+
+ 1
+ chr1
+ chr1
+ chr1
+ chr1
+ -100000.0
+ -100000.0
+ -50000.0
+
+
+ 2
+ chr1
+ chr1
+ chr1
+ chr1
+ -100000.0
+ -100000.0
+ 0.0
+
+
+ 3
+ chr1
+ chr1
+ chr1
+ chr1
+ -100000.0
+ -100000.0
+ 50000.0
+
+
+ 4
+ chr1
+ chr1
+ chr1
+ chr1
+ -100000.0
+ -100000.0
+ 100000.0
+
+
+ ...
+ ...
+ ...
+ ...
+ ...
+ ...
+ ...
+ ...
+
+
+ 245
+ chr2
+ chr2
+ chr2
+ chr2
+ 100000.0
+ 100000.0
+ -100000.0
+
+
+ 246
+ chr2
+ chr2
+ chr2
+ chr2
+ 100000.0
+ 100000.0
+ -50000.0
+
+
+ 247
+ chr2
+ chr2
+ chr2
+ chr2
+ 100000.0
+ 100000.0
+ 0.0
+
+
+ 248
+ chr2
+ chr2
+ chr2
+ chr2
+ 100000.0
+ 100000.0
+ 50000.0
+
+
+ 249
+ chr2
+ chr2
+ chr2
+ chr2
+ 100000.0
+ 100000.0
+ 100000.0
+
+
+
+
250 rows × 7 columns
+
+
+
+
+## Aggregating genomic data based on it's distance to a target region
+In this example, we extend the above use-case to aggregate the results based on the distance columns added. This is a common use-case to calculate aggregate statistics for different distance levels. To achieve this, we employ the same query plan as above and extend it using the `DistanceAggregation` query step.
+
+
+```python
+from spoc.query_engine import DistanceAggregation, AggregationFunction
+```
+
+The `DistanceAggregation` class requires the following parameters:
+- `value_columns`: Thie specifies the value to aggregate
+- `function`: The aggregation function to use. This is the enumerated type `AggregationFunction`
+- `densify_output`: Whether missing distance values should be filled with empty values (specific empty value depends on the aggregation function)
+
+Note that there are two different average functions available, `AVG` and `AVG_WITH_EMPTY`. `AVG` performs and average over all available columns, where as `AVG_WITH_EMPTY` counts missing distances per regions as 0.
+
+
+```python
+query_steps = [
+ Overlap(target_regions, anchor_mode=Anchor(mode="ALL")),
+ DistanceTransformation(),
+ DistanceAggregation(
+ value_column='count',
+ function=AggregationFunction.AVG,
+ ),
+]
+```
+
+
+```python
+Query(query_steps=query_steps)\
+ .build(pixels)\
+ .compute()
+```
+
+
+
+
+
+
+
+
+
+
+ distance_1
+ distance_2
+ distance_3
+ count
+
+
+
+
+ 0
+ -100000.0
+ -100000.0
+ -100000.0
+ 4.5
+
+
+ 1
+ -100000.0
+ -100000.0
+ -50000.0
+ 3.0
+
+
+ 2
+ -100000.0
+ -100000.0
+ 0.0
+ 5.5
+
+
+ 3
+ -100000.0
+ -100000.0
+ 50000.0
+ 5.0
+
+
+ 4
+ -100000.0
+ -100000.0
+ 100000.0
+ 6.0
+
+
+ ...
+ ...
+ ...
+ ...
+ ...
+
+
+ 120
+ 100000.0
+ 100000.0
+ -100000.0
+ 8.0
+
+
+ 121
+ 100000.0
+ 100000.0
+ -50000.0
+ 4.5
+
+
+ 122
+ 100000.0
+ 100000.0
+ 0.0
+ 4.5
+
+
+ 123
+ 100000.0
+ 100000.0
+ 50000.0
+ 4.5
+
+
+ 124
+ 100000.0
+ 100000.0
+ 100000.0
+ 0.0
+
+
+
+
125 rows × 4 columns
+
+
+
+
+In addition, we can also aggregate on a subset of distance positions, using the `position_list` parameter:
+
+
+```python
+query_steps = [
+ Overlap(target_regions, anchor_mode=Anchor(mode="ALL")),
+ DistanceTransformation(),
+ DistanceAggregation(
+ value_column='count',
+ function=AggregationFunction.AVG,
+ position_list=[1,2]
+ ),
+]
+```
+
+
+```python
+Query(query_steps=query_steps)\
+ .build(pixels)\
+ .compute()
+```
+
+
+
+
+
+
+
+
+
+
+ distance_1
+ distance_2
+ count
+
+
+
+
+ 0
+ -100000.0
+ -100000.0
+ 4.8
+
+
+ 1
+ -100000.0
+ -50000.0
+ 4.5
+
+
+ 2
+ -100000.0
+ 0.0
+ 5.3
+
+
+ 3
+ -100000.0
+ 50000.0
+ 4.7
+
+
+ 4
+ -100000.0
+ 100000.0
+ 5.3
+
+
+ 5
+ -50000.0
+ -100000.0
+ 4.8
+
+
+ 6
+ -50000.0
+ -50000.0
+ 4.4
+
+
+ 7
+ -50000.0
+ 0.0
+ 4.5
+
+
+ 8
+ -50000.0
+ 50000.0
+ 5.4
+
+
+ 9
+ -50000.0
+ 100000.0
+ 3.4
+
+
+ 10
+ 0.0
+ -100000.0
+ 2.0
+
+
+ 11
+ 0.0
+ -50000.0
+ 3.5
+
+
+ 12
+ 0.0
+ 0.0
+ 4.4
+
+
+ 13
+ 0.0
+ 50000.0
+ 5.4
+
+
+ 14
+ 0.0
+ 100000.0
+ 4.3
+
+
+ 15
+ 50000.0
+ -100000.0
+ 5.3
+
+
+ 16
+ 50000.0
+ -50000.0
+ 4.7
+
+
+ 17
+ 50000.0
+ 0.0
+ 4.0
+
+
+ 18
+ 50000.0
+ 50000.0
+ 4.2
+
+
+ 19
+ 50000.0
+ 100000.0
+ 6.1
+
+
+ 20
+ 100000.0
+ -100000.0
+ 5.4
+
+
+ 21
+ 100000.0
+ -50000.0
+ 2.8
+
+
+ 22
+ 100000.0
+ 0.0
+ 3.6
+
+
+ 23
+ 100000.0
+ 50000.0
+ 5.2
+
+
+ 24
+ 100000.0
+ 100000.0
+ 4.3
+
+
+
+
+
+
diff --git a/docs/snipping.md b/docs/snipping.md
deleted file mode 100644
index b80ae61..0000000
--- a/docs/snipping.md
+++ /dev/null
@@ -1,150 +0,0 @@
-# Snipping examples
-
-## Background
-
-Snipping is the act of extracting rectangular "snippets" from a dataset containing pore-c interactions in the form of pixels. For example, we might have a file containing two-way interactions (similar to conventional Hi-C) in the form of a matrix of genomic bins that encode the interaction frequency of these pixels. We can extract the neighborhood of TAD-boundaries from this file, specifically neighborhoods centered at TAD-boundaries that stretch 1MB upstream and downstream. The result of this extraction is a 3-dimensional matrix, where the first two dimensions encode the relative genomic offset and the third dimension encodes the particular examples, here TAD-boundaries.
-
-## Average representation of snippets
-
-Snipping is often the first step of an analysis pipeline, where the extraction of snippets is followed by reducing them along the example dimension to obtain an average representation of a particular genomic neighbourhood.
-
-## Higher-order contacts and snipping
-
-If we want to extract snippets from a higher-order contact file, things get a little more complicated than for two-way contacts. Specifically, we are often interested in obtaining a two-dimensional representation of a certain high-dimensional snippet to enable visualization. To do this, we need to define different ways of snipping from a high-dimensional file. For example, we might want to know which two-dimensional contacts are associated with a set of third coordinates, like TAD-boundaries. Or we might want to know which genomic positions are associated with a two-dimensional query. To account for this, spoc implements different "snipping strategies" that encapsulate the different ways we might want to query a high-dimensional contact file.
-
-
-## Snipper class
-The snipper class is spoc's central interface to access snipping functionality. It can be used with different snipping strategies to extract snippets. The idea is that a snipper can hold different snipping strategies that can be deployed to achieve the desired result.
-
-
-## Snipping strategies and snipping values
-
-### Snipping strategies
-
-Snipping strategies are specific ways of snipping higher-order contacts. For example, the `Triplet1DSnippingStrategy` implements snipping 2D pieces from triplets based on a set of 1d-coordinates. This can be used to ask questions like:
-
-- Do sister chromatids form internal loops when they contact each other?
-
-The idea is that a specific snipping strategy encapsulates all information of extracting snippets, such as the size of the snippets, potential offsets, values, etc. The constructor of a particular snipping strategy is specific to that strategy. Still, the snipping implementation has a unified strategy to allow the composition of different strategies in a snipper class.
-
-### Snipping values
-
-Usually, the values that are extracted from a target file can be either ICCF (iteratively corrected contact frequencies) or observed-over-expected values. The latter represents the ratio of the actually observed contacts and the expected contacts for randomly chosen genomic regions.
-
-
-```python
-from spoc.snipping import Snipper
-from spoc.snipping.snipping_strategies import Triplet1DSnippingStrategy,
- SnippingValues
-
-snip_center = Triplet1DSnippingStrategy(
- bin_size=100_000,
- half_window_size=2_000_000,
- snipping_value=SnippingValues.ICCF)
-snipper = Snipper([snip_center])
-```
-
-- Here the snipper is instantiated with a single snipping strategy that snips ICCF values from the center of a set of regions
-
-
-```python
-tad_boundaries = pd.read_csv("../playground/tad_boundaries.csv")
-```
-
-- TADs as target regions are loaded
-
-
-```python
-result = snipper.snip("../playground/test.parquet", tad_boundaries, threads=40)
-```
-
-- Snipper is used to snip ICCF values around TAD-boundaries with a 1Mbp half-window size
-
-
-```python
-f, ax = plt.subplots()
-sbn.heatmap(np.log2(result[0]))
-plt.show()
-```
-
-
-
-![png](snipping_files/snipping_13_0.png)
-
-
-
-## Combining snipping strategies
-Multiple snipping strategies can be used and dispatched together.
-
-### Snip ICCF and Obs/Exp together
-
-
-```python
-snip_center_obs_exp = Triplet1DSnippingStrategy(
- bin_size=100_000,
- half_window_size=2_000_000,
- snipping_value=SnippingValues.OBSEXP)
-snipper = Snipper([snip_center, snip_center_obs_exp])
-```
-
-
-```python
-result = snipper.snip("../playground/test.parquet", tad_boundaries, threads=40)
-```
-
-
-```python
-f, ax = plt.subplots(1, 2)
-sbn.heatmap(np.log2(result[0]), ax=ax[0], square=True)
-ax[0].set_title("ICCF")x
-sbn.heatmap(np.log2(result[1]),
- ax=ax[1],
- cmap="RdBu_r",
- vmin=-0.5,
- vmax=0.5,
- square=True)
-ax[1].set_title("Obs/Exp")
-f.set_size_inches(10, 6)
-plt.show()
-```
-
-
-
-![png](snipping_files/snipping_19_0.png)
-
-
-
-Generate triplet strategies with different offsets.
-
-
-```python
-offset_strategies = [
- Triplet1DSnippingStrategy(
- bin_size=100_000,
- half_window_size=2_000_000,
- snipping_value=SnippingValues.ICCF,
- relative_offset=offset)
- for offset in np.arange(-1_000_000, 1_000_001, 500_000)
-]
-snipper = Snipper(offset_strategies)
-```
-
-
-```python
-result = snipper.snip("../playground/test.parquet", tad_boundaries, threads=40)
-```
-
-
-```python
-f, ax = plt.subplots(1, len(result))
-for index, array in enumerate(result):
- sbn.heatmap(np.log2(result[index]), ax=ax[index], square=True)
-f.set_size_inches(20, 5)
-plt.show()
-```
-
-
-
-![png](snipping_files/snipping_24_0.png)
-
-
diff --git a/mkdocs.yml b/mkdocs.yml
index e862868..2faff7f 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -17,7 +17,9 @@ nav:
- Load example dataset: load_example_dataset.md
- Transformation pipeline: transformation_pipeline.md
- Data Structures: data_structures.md
- - Snipping: snipping.md
+ - Query Engine:
+ - Usage guide: query_engine_usage.md
+ - Class interface: query_engine_interface.md
- Contributing: contributing.md
- API:
- CLI: cli.md
@@ -25,6 +27,7 @@ nav:
- Contacts: contacts.md
- Fragments: fragments.md
- Pixels: pixels.md
+ - Query Engine: query_engine.md
- Dataframe models: dataframe_models.md
- File parameters models: file_parameters_models.md
plugins:
@@ -35,6 +38,11 @@ markdown_extensions:
anchor_linenums: true
line_spans: __span
pygments_lang_class: true
+ - pymdownx.superfences:
+ custom_fences:
+ - name: mermaid
+ class: mermaid
+ format: !!python/name:pymdownx.superfences.fence_code_format
- pymdownx.inlinehilite
- pymdownx.snippets
- pymdownx.superfences
diff --git a/notebooks/data_structures.ipynb b/notebooks/data_structures.ipynb
index 05b768e..7c2ddcc 100644
--- a/notebooks/data_structures.ipynb
+++ b/notebooks/data_structures.ipynb
@@ -225,7 +225,8 @@
"metadata": {},
"outputs": [],
"source": [
- "fragments = FileManager(use_dask=True).load_fragments(\"../tests/test_files/good_porec.parquet\")"
+ "from spoc.io import DataMode\n",
+ "fragments = FileManager(DataMode.DASK).load_fragments(\"../tests/test_files/good_porec.parquet\")"
]
},
{
@@ -351,7 +352,7 @@
"metadata": {},
"outputs": [],
"source": [
- "fragments = FileManager(use_dask=True).load_fragments(\"../tests/test_files/bad_porec.parquet\")"
+ "fragments = FileManager(DataMode.DASK).load_fragments(\"../tests/test_files/bad_porec.parquet\")"
]
},
{
diff --git a/notebooks/query_engine_usage.ipynb b/notebooks/query_engine_usage.ipynb
new file mode 100644
index 0000000..e64ff4e
--- /dev/null
+++ b/notebooks/query_engine_usage.ipynb
@@ -0,0 +1,1299 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Query engine\n",
+ "This technical document describes the spoc query engine, a set of classes that implements spoc's interface for querying multi-dimensional genomic data."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Principles\n",
+ "\n",
+ "### Composable pieces\n",
+ "Spoc's query engine consists of composable pieces that can be combined to produce an expressive query language. These pieces represent basic operations on genomic data that are easily implemented and understood on their own. This allows a great degree of flexibility, while also allowing predefined recipes that less experienced users can get started with.\n",
+ "\n",
+ "### Lazy evaluation\n",
+ "The spoc query engine is designed with lazy evaluation as a guiding principle. This means that data queries are only executed when they are needed to minimize loading data into memory and computational overhead. To enable this, spoc queries have a construction phase, which specifies the operations to be executed and an execution phase, that actually executes the query."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Query plans and query steps\n",
+ "\n",
+ "The most important ingredient in this query language is a class that implements the `QueryStep` protocol. This protocol serves two purposes:\n",
+ "\n",
+ "- It exposes a way to validate the data schema during query building\n",
+ "- It implements adding itself to a query\n",
+ "\n",
+ "This way, query steps can be combined into a query plan that specifies the analysis to be executed. In generaly, spoc supports the following types of query steps:\n",
+ "\n",
+ "- **Overlap**: Overlaps genomic data with a set of genomic intervals\n",
+ "- **Aggregation**: Aggregates the data passed into the query using an aggregation function\n",
+ "- **Transform**: Adds columns to query based on a fixed or custom computation\n",
+ "\n",
+ "Specific examples of query steps are:\n",
+ "\n",
+ "- **DistanceTransformation**: Example of a transfomration. Adds distance of genomic positions to regions added by Overlap\n",
+ "- **DistanceAggregation**: Exaple of an aggregation. Aggregates the distances to genomic regions using an aggregation function.\n",
+ "\n",
+ "### Input and output of query steps\n",
+ "\n",
+ "A query step takes as input a class that implements the `GenomicData` protocol. This protocol allows retrievel of the data schema (a thin wrapper over a pandera dataframe schema) as well as the data itself. The output of a query step is again a class that ipmlements the `GenomicData` protocol to allow composition. Specific examples of possible inputs are:\n",
+ "\n",
+ "- **Pixels**: Represents input pixels\n",
+ "- **Contacts**: Represents input contacts\n",
+ "- **QueryPlan**: The result of a query step\n",
+ "\n",
+ "### Composition of query steps\n",
+ "\n",
+ "To allow specifying complex queries, query steps need to be combined. This is done using the `Query` class. It takes a query plan (a list of `QueryStep` instances) as input, exposes the `build` method, which takes input data, validates all query steps and adds them to the resulting `QueryPlan` instance that is returned.\n",
+ "\n",
+ "### Manifestation of results\n",
+ "\n",
+ "So far, we have only talked about specifying the query to be executed, but not how to actually execute it. A `QueryPlan` has a `compute()` method that returns the manifested dataframe as a `pd.DataFrame` instance. This is the step that actually executes the specified query."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Examples"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Selecting a subset of contacts at a single genomic position\n",
+ "In this example, we want to select a subset of genomic contacts at a single location. For this, we first load the required input data:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from spoc.query_engine import Overlap, Anchor, Query\n",
+ "from spoc.contacts import Contacts\n",
+ "import pandas as pd\n",
+ "\n",
+ "contacts = Contacts.from_uri(\"../tests/test_files/contacts_unlabelled_2d_v2.parquet::2\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Then we specify a target region"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "target_region = pd.DataFrame({\n",
+ " \"chrom\": ['chr1'],\n",
+ " \"start\": [100],\n",
+ " \"end\": [400],\n",
+ "})"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "First, we want to select all contacts where any of the fragments constituting the contact overlaps the target region. To perform this action, we use the Overlap class and pass the target region as well as an instance of the `Anchor` class. The `Anchor` dataclass allows us to specify how we want to filter contacts for region overlap. It has two attributes `mode` and `anchors`. `Anchors` indicates the positions we want to filter on (default is all positions) and `mode` specifies whether we require all positions to overlap or any position to overlap. So for example, if we want all of our two-way contacts for which any of the positions overlap, we would use `Anchor(mode='ANY', anchors=[1,2])`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "query_steps = [\n",
+ " Overlap(target_region, anchor_mode=Anchor(mode=\"ANY\", anchors=[1,2]))\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "A query plan is a list of qury steps that can be used in the basic query class"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "query = Query(query_steps=query_steps)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `.build` method executes the query plan and retuns a `QueryPlan` object"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "
"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "result = query.build(contacts)\n",
+ "result"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `.load_result` method of the `QueryResult` object can be executed using `.load_result`, which returns a `pd.DataFrame`. The resulting dataframe has additional columns that represent the regions, with which the input contacts overlapped."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " chrom_1 \n",
+ " start_1 \n",
+ " end_1 \n",
+ " chrom_2 \n",
+ " start_2 \n",
+ " end_2 \n",
+ " region_chrom \n",
+ " region_start \n",
+ " region_end \n",
+ " region_id \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " chr1 \n",
+ " 100 \n",
+ " 200 \n",
+ " chr1 \n",
+ " 1000 \n",
+ " 2000 \n",
+ " chr1 \n",
+ " 100 \n",
+ " 400 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " chr1 \n",
+ " 2000 \n",
+ " 3000 \n",
+ " chr1 \n",
+ " 200 \n",
+ " 300 \n",
+ " chr1 \n",
+ " 100 \n",
+ " 400 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " chr1 \n",
+ " 3000 \n",
+ " 4000 \n",
+ " chr1 \n",
+ " 300 \n",
+ " 400 \n",
+ " chr1 \n",
+ " 100 \n",
+ " 400 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " chrom_1 start_1 end_1 chrom_2 start_2 end_2 region_chrom region_start \\\n",
+ "0 chr1 100 200 chr1 1000 2000 chr1 100 \n",
+ "1 chr1 2000 3000 chr1 200 300 chr1 100 \n",
+ "2 chr1 3000 4000 chr1 300 400 chr1 100 \n",
+ "\n",
+ " region_end region_id \n",
+ "0 400 0 \n",
+ "1 400 0 \n",
+ "2 400 0 "
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = result.compute()\n",
+ "print(type(df))\n",
+ "df.filter(regex=r\"chrom|start|end|id\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can also restrict the positions to filter on, by passing different anchor parameters. For example, we can filter for contacts, where the first position overlaps with our target:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " chrom_1 \n",
+ " start_1 \n",
+ " end_1 \n",
+ " chrom_2 \n",
+ " start_2 \n",
+ " end_2 \n",
+ " region_chrom \n",
+ " region_start \n",
+ " region_end \n",
+ " region_id \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " chr1 \n",
+ " 100 \n",
+ " 200 \n",
+ " chr1 \n",
+ " 1000 \n",
+ " 2000 \n",
+ " chr1 \n",
+ " 100 \n",
+ " 400 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " chrom_1 start_1 end_1 chrom_2 start_2 end_2 region_chrom region_start \\\n",
+ "0 chr1 100 200 chr1 1000 2000 chr1 100 \n",
+ "\n",
+ " region_end region_id \n",
+ "0 400 0 "
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "query_steps = [\n",
+ " Overlap(target_region, anchor_mode=Anchor(mode=\"ANY\", anchors=[1]))\n",
+ "]\n",
+ "Query(query_steps=query_steps)\\\n",
+ " .build(contacts)\\\n",
+ " .compute()\\\n",
+ " .filter(regex=r\"chrom|start|end|id\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This time, only the first contact overlaps."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The same functionality is implemented also for the Pixels class"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Selecting a subset of contacts at multiple genomic regions\n",
+ "The Overlap class is also capable of selecting contacts at multiple genomic regions. Here, the behavior of `Overlap` deviates from a simple filter, because if a given contact overlaps with multiple regions, it will be returned multiple times."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Specify target regions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "target_regions = pd.DataFrame({\n",
+ " \"chrom\": ['chr1', 'chr1'],\n",
+ " \"start\": [100, 150],\n",
+ " \"end\": [400, 200],\n",
+ "})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " chrom_1 \n",
+ " start_1 \n",
+ " end_1 \n",
+ " chrom_2 \n",
+ " start_2 \n",
+ " end_2 \n",
+ " region_chrom \n",
+ " region_start \n",
+ " region_end \n",
+ " region_id \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " chr1 \n",
+ " 100 \n",
+ " 200 \n",
+ " chr1 \n",
+ " 1000 \n",
+ " 2000 \n",
+ " chr1 \n",
+ " 100 \n",
+ " 400 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " chr1 \n",
+ " 100 \n",
+ " 200 \n",
+ " chr1 \n",
+ " 1000 \n",
+ " 2000 \n",
+ " chr1 \n",
+ " 150 \n",
+ " 200 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " chrom_1 start_1 end_1 chrom_2 start_2 end_2 region_chrom region_start \\\n",
+ "0 chr1 100 200 chr1 1000 2000 chr1 100 \n",
+ "1 chr1 100 200 chr1 1000 2000 chr1 150 \n",
+ "\n",
+ " region_end region_id \n",
+ "0 400 0 \n",
+ "1 200 1 "
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "query_steps = [\n",
+ " Overlap(target_regions, anchor_mode=Anchor(mode=\"ANY\", anchors=[1]))\n",
+ "]\n",
+ "Query(query_steps=query_steps)\\\n",
+ " .build(contacts)\\\n",
+ " .compute()\\\n",
+ " .filter(regex=r\"chrom|start|end|id\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In this example, the contact overlapping both regions is duplicated."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The same functionality is implemented also for the pixels class."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Calculating the distance to a target region and aggregating the result\n",
+ "In this example, we calculate the distance of pixels to target regions and aggregate based on the distances. This is a very common use case in so-called pileup analyses, where we want to investigate the average behavior around regions of interest."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from spoc.pixels import Pixels\n",
+ "from spoc.query_engine import DistanceTransformation, DistanceMode\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "from itertools import product"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "First we define a set of target pixels"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def complete_synthetic_pixels():\n",
+ " \"\"\"Pixels that span two regions densely\"\"\"\n",
+ " np.random.seed(42)\n",
+ " # genomic region_1\n",
+ " pixels_1 = [\n",
+ " {\n",
+ " \"chrom\": tup[0],\n",
+ " \"start_1\": tup[1],\n",
+ " \"start_2\": tup[2],\n",
+ " \"start_3\": tup[3],\n",
+ " \"count\": np.random.randint(0, 10),\n",
+ " }\n",
+ " for tup in product(\n",
+ " [\"chr1\"],\n",
+ " np.arange(900_000, 1_150_000, 50_000),\n",
+ " np.arange(900_000, 1_150_000, 50_000),\n",
+ " np.arange(900_000, 1_150_000, 50_000),\n",
+ " )\n",
+ " ]\n",
+ " # genomic region_2\n",
+ " pixels_2 = [\n",
+ " {\n",
+ " \"chrom\": tup[0],\n",
+ " \"start_1\": tup[1],\n",
+ " \"start_2\": tup[2],\n",
+ " \"start_3\": tup[3],\n",
+ " \"count\": np.random.randint(0, 10),\n",
+ " }\n",
+ " for tup in product(\n",
+ " [\"chr2\"],\n",
+ " np.arange(900_000, 1_150_000, 50_000),\n",
+ " np.arange(900_000, 1_150_000, 50_000),\n",
+ " np.arange(900_000, 1_150_000, 50_000),\n",
+ " )\n",
+ " ]\n",
+ " return pd.concat((pd.DataFrame(pixels_1), pd.DataFrame(pixels_2)))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pixels = Pixels(complete_synthetic_pixels(), number_fragments=3, binsize=50_000)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Then we define the target regions we are interested in."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "target_regions = pd.DataFrame(\n",
+ " {\n",
+ " \"chrom\": [\"chr1\", \"chr2\"],\n",
+ " \"start\": [900_000, 900_000],\n",
+ " \"end\": [1_100_000, 1_100_000],\n",
+ " }\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We are then interested in selecting all contacts that are contained within these pixels and then calculate the distance to them. The selection step can be done with the `Overlap` class that we described above. The distance transformation can be done with the `DistanceTransformation` query step. This query step takes an instance of genomic data that contains regions (as defined by it's schema) and calculates the distance to all position columns. All distances are calculated with regards to the center of each assigned region. Since genomic positions are defined by a start and end,the `DistanceTransformation` query step has a `DistanceMode` parameter that defines whether we would like to calculate the distance with regard to the start of a genomic position, the end or it's center."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "query_steps = [\n",
+ " Overlap(target_regions, anchor_mode=Anchor(mode=\"ANY\")),\n",
+ " DistanceTransformation(\n",
+ " distance_mode=DistanceMode.LEFT,\n",
+ " ),\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can then execute this query plan using the Query class. This well add an distance column to the genomic dataset returned."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " chrom_1 \n",
+ " chrom_2 \n",
+ " chrom_3 \n",
+ " region_chrom \n",
+ " distance_1 \n",
+ " distance_2 \n",
+ " distance_3 \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " chr1 \n",
+ " chr1 \n",
+ " chr1 \n",
+ " chr1 \n",
+ " -100000.0 \n",
+ " -100000.0 \n",
+ " -100000.0 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " chr1 \n",
+ " chr1 \n",
+ " chr1 \n",
+ " chr1 \n",
+ " -100000.0 \n",
+ " -100000.0 \n",
+ " -50000.0 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " chr1 \n",
+ " chr1 \n",
+ " chr1 \n",
+ " chr1 \n",
+ " -100000.0 \n",
+ " -100000.0 \n",
+ " 0.0 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " chr1 \n",
+ " chr1 \n",
+ " chr1 \n",
+ " chr1 \n",
+ " -100000.0 \n",
+ " -100000.0 \n",
+ " 50000.0 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " chr1 \n",
+ " chr1 \n",
+ " chr1 \n",
+ " chr1 \n",
+ " -100000.0 \n",
+ " -100000.0 \n",
+ " 100000.0 \n",
+ " \n",
+ " \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " \n",
+ " \n",
+ " 245 \n",
+ " chr2 \n",
+ " chr2 \n",
+ " chr2 \n",
+ " chr2 \n",
+ " 100000.0 \n",
+ " 100000.0 \n",
+ " -100000.0 \n",
+ " \n",
+ " \n",
+ " 246 \n",
+ " chr2 \n",
+ " chr2 \n",
+ " chr2 \n",
+ " chr2 \n",
+ " 100000.0 \n",
+ " 100000.0 \n",
+ " -50000.0 \n",
+ " \n",
+ " \n",
+ " 247 \n",
+ " chr2 \n",
+ " chr2 \n",
+ " chr2 \n",
+ " chr2 \n",
+ " 100000.0 \n",
+ " 100000.0 \n",
+ " 0.0 \n",
+ " \n",
+ " \n",
+ " 248 \n",
+ " chr2 \n",
+ " chr2 \n",
+ " chr2 \n",
+ " chr2 \n",
+ " 100000.0 \n",
+ " 100000.0 \n",
+ " 50000.0 \n",
+ " \n",
+ " \n",
+ " 249 \n",
+ " chr2 \n",
+ " chr2 \n",
+ " chr2 \n",
+ " chr2 \n",
+ " 100000.0 \n",
+ " 100000.0 \n",
+ " 100000.0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
250 rows × 7 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " chrom_1 chrom_2 chrom_3 region_chrom distance_1 distance_2 distance_3\n",
+ "0 chr1 chr1 chr1 chr1 -100000.0 -100000.0 -100000.0\n",
+ "1 chr1 chr1 chr1 chr1 -100000.0 -100000.0 -50000.0\n",
+ "2 chr1 chr1 chr1 chr1 -100000.0 -100000.0 0.0\n",
+ "3 chr1 chr1 chr1 chr1 -100000.0 -100000.0 50000.0\n",
+ "4 chr1 chr1 chr1 chr1 -100000.0 -100000.0 100000.0\n",
+ ".. ... ... ... ... ... ... ...\n",
+ "245 chr2 chr2 chr2 chr2 100000.0 100000.0 -100000.0\n",
+ "246 chr2 chr2 chr2 chr2 100000.0 100000.0 -50000.0\n",
+ "247 chr2 chr2 chr2 chr2 100000.0 100000.0 0.0\n",
+ "248 chr2 chr2 chr2 chr2 100000.0 100000.0 50000.0\n",
+ "249 chr2 chr2 chr2 chr2 100000.0 100000.0 100000.0\n",
+ "\n",
+ "[250 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "Query(query_steps=query_steps)\\\n",
+ " .build(pixels)\\\n",
+ " .compute()\\\n",
+ " .filter(regex=r\"chrom|distance\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Aggregating genomic data based on it's distance to a target region\n",
+ "In this example, we extend the above use-case to aggregate the results based on the distance columns added. This is a common use-case to calculate aggregate statistics for different distance levels. To achieve this, we employ the same query plan as above and extend it using the `DistanceAggregation` query step."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from spoc.query_engine import DistanceAggregation, AggregationFunction"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `DistanceAggregation` class requires the following parameters:\n",
+ "- `value_columns`: Thie specifies the value to aggregate\n",
+ "- `function`: The aggregation function to use. This is the enumerated type `AggregationFunction`\n",
+ "- `densify_output`: Whether missing distance values should be filled with empty values (specific empty value depends on the aggregation function)\n",
+ "\n",
+ "Note that there are two different average functions available, `AVG` and `AVG_WITH_EMPTY`. `AVG` performs and average over all available columns, where as `AVG_WITH_EMPTY` counts missing distances per regions as 0."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "query_steps = [\n",
+ " Overlap(target_regions, anchor_mode=Anchor(mode=\"ALL\")),\n",
+ " DistanceTransformation(),\n",
+ " DistanceAggregation(\n",
+ " value_column='count',\n",
+ " function=AggregationFunction.AVG,\n",
+ " ),\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " distance_1 \n",
+ " distance_2 \n",
+ " distance_3 \n",
+ " count \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " -100000.0 \n",
+ " -100000.0 \n",
+ " -100000.0 \n",
+ " 4.5 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " -100000.0 \n",
+ " -100000.0 \n",
+ " -50000.0 \n",
+ " 3.0 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " -100000.0 \n",
+ " -100000.0 \n",
+ " 0.0 \n",
+ " 5.5 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " -100000.0 \n",
+ " -100000.0 \n",
+ " 50000.0 \n",
+ " 5.0 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " -100000.0 \n",
+ " -100000.0 \n",
+ " 100000.0 \n",
+ " 6.0 \n",
+ " \n",
+ " \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " \n",
+ " \n",
+ " 120 \n",
+ " 100000.0 \n",
+ " 100000.0 \n",
+ " -100000.0 \n",
+ " 8.0 \n",
+ " \n",
+ " \n",
+ " 121 \n",
+ " 100000.0 \n",
+ " 100000.0 \n",
+ " -50000.0 \n",
+ " 4.5 \n",
+ " \n",
+ " \n",
+ " 122 \n",
+ " 100000.0 \n",
+ " 100000.0 \n",
+ " 0.0 \n",
+ " 4.5 \n",
+ " \n",
+ " \n",
+ " 123 \n",
+ " 100000.0 \n",
+ " 100000.0 \n",
+ " 50000.0 \n",
+ " 4.5 \n",
+ " \n",
+ " \n",
+ " 124 \n",
+ " 100000.0 \n",
+ " 100000.0 \n",
+ " 100000.0 \n",
+ " 0.0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
125 rows × 4 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " distance_1 distance_2 distance_3 count\n",
+ "0 -100000.0 -100000.0 -100000.0 4.5\n",
+ "1 -100000.0 -100000.0 -50000.0 3.0\n",
+ "2 -100000.0 -100000.0 0.0 5.5\n",
+ "3 -100000.0 -100000.0 50000.0 5.0\n",
+ "4 -100000.0 -100000.0 100000.0 6.0\n",
+ ".. ... ... ... ...\n",
+ "120 100000.0 100000.0 -100000.0 8.0\n",
+ "121 100000.0 100000.0 -50000.0 4.5\n",
+ "122 100000.0 100000.0 0.0 4.5\n",
+ "123 100000.0 100000.0 50000.0 4.5\n",
+ "124 100000.0 100000.0 100000.0 0.0\n",
+ "\n",
+ "[125 rows x 4 columns]"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "Query(query_steps=query_steps)\\\n",
+ " .build(pixels)\\\n",
+ " .compute()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In addition, we can also aggregate on a subset of distance positions, using the `position_list` parameter:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "query_steps = [\n",
+ " Overlap(target_regions, anchor_mode=Anchor(mode=\"ALL\")),\n",
+ " DistanceTransformation(),\n",
+ " DistanceAggregation(\n",
+ " value_column='count',\n",
+ " function=AggregationFunction.AVG,\n",
+ " position_list=[1,2]\n",
+ " ),\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " distance_1 \n",
+ " distance_2 \n",
+ " count \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " -100000.0 \n",
+ " -100000.0 \n",
+ " 4.8 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " -100000.0 \n",
+ " -50000.0 \n",
+ " 4.5 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " -100000.0 \n",
+ " 0.0 \n",
+ " 5.3 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " -100000.0 \n",
+ " 50000.0 \n",
+ " 4.7 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " -100000.0 \n",
+ " 100000.0 \n",
+ " 5.3 \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " -50000.0 \n",
+ " -100000.0 \n",
+ " 4.8 \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " -50000.0 \n",
+ " -50000.0 \n",
+ " 4.4 \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " -50000.0 \n",
+ " 0.0 \n",
+ " 4.5 \n",
+ " \n",
+ " \n",
+ " 8 \n",
+ " -50000.0 \n",
+ " 50000.0 \n",
+ " 5.4 \n",
+ " \n",
+ " \n",
+ " 9 \n",
+ " -50000.0 \n",
+ " 100000.0 \n",
+ " 3.4 \n",
+ " \n",
+ " \n",
+ " 10 \n",
+ " 0.0 \n",
+ " -100000.0 \n",
+ " 2.0 \n",
+ " \n",
+ " \n",
+ " 11 \n",
+ " 0.0 \n",
+ " -50000.0 \n",
+ " 3.5 \n",
+ " \n",
+ " \n",
+ " 12 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 4.4 \n",
+ " \n",
+ " \n",
+ " 13 \n",
+ " 0.0 \n",
+ " 50000.0 \n",
+ " 5.4 \n",
+ " \n",
+ " \n",
+ " 14 \n",
+ " 0.0 \n",
+ " 100000.0 \n",
+ " 4.3 \n",
+ " \n",
+ " \n",
+ " 15 \n",
+ " 50000.0 \n",
+ " -100000.0 \n",
+ " 5.3 \n",
+ " \n",
+ " \n",
+ " 16 \n",
+ " 50000.0 \n",
+ " -50000.0 \n",
+ " 4.7 \n",
+ " \n",
+ " \n",
+ " 17 \n",
+ " 50000.0 \n",
+ " 0.0 \n",
+ " 4.0 \n",
+ " \n",
+ " \n",
+ " 18 \n",
+ " 50000.0 \n",
+ " 50000.0 \n",
+ " 4.2 \n",
+ " \n",
+ " \n",
+ " 19 \n",
+ " 50000.0 \n",
+ " 100000.0 \n",
+ " 6.1 \n",
+ " \n",
+ " \n",
+ " 20 \n",
+ " 100000.0 \n",
+ " -100000.0 \n",
+ " 5.4 \n",
+ " \n",
+ " \n",
+ " 21 \n",
+ " 100000.0 \n",
+ " -50000.0 \n",
+ " 2.8 \n",
+ " \n",
+ " \n",
+ " 22 \n",
+ " 100000.0 \n",
+ " 0.0 \n",
+ " 3.6 \n",
+ " \n",
+ " \n",
+ " 23 \n",
+ " 100000.0 \n",
+ " 50000.0 \n",
+ " 5.2 \n",
+ " \n",
+ " \n",
+ " 24 \n",
+ " 100000.0 \n",
+ " 100000.0 \n",
+ " 4.3 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " distance_1 distance_2 count\n",
+ "0 -100000.0 -100000.0 4.8\n",
+ "1 -100000.0 -50000.0 4.5\n",
+ "2 -100000.0 0.0 5.3\n",
+ "3 -100000.0 50000.0 4.7\n",
+ "4 -100000.0 100000.0 5.3\n",
+ "5 -50000.0 -100000.0 4.8\n",
+ "6 -50000.0 -50000.0 4.4\n",
+ "7 -50000.0 0.0 4.5\n",
+ "8 -50000.0 50000.0 5.4\n",
+ "9 -50000.0 100000.0 3.4\n",
+ "10 0.0 -100000.0 2.0\n",
+ "11 0.0 -50000.0 3.5\n",
+ "12 0.0 0.0 4.4\n",
+ "13 0.0 50000.0 5.4\n",
+ "14 0.0 100000.0 4.3\n",
+ "15 50000.0 -100000.0 5.3\n",
+ "16 50000.0 -50000.0 4.7\n",
+ "17 50000.0 0.0 4.0\n",
+ "18 50000.0 50000.0 4.2\n",
+ "19 50000.0 100000.0 6.1\n",
+ "20 100000.0 -100000.0 5.4\n",
+ "21 100000.0 -50000.0 2.8\n",
+ "22 100000.0 0.0 3.6\n",
+ "23 100000.0 50000.0 5.2\n",
+ "24 100000.0 100000.0 4.3"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "Query(query_steps=query_steps)\\\n",
+ " .build(pixels)\\\n",
+ " .compute()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "spoc-dev",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/snipping.ipynb b/notebooks/snipping.ipynb
deleted file mode 100644
index 7055a14..0000000
--- a/notebooks/snipping.ipynb
+++ /dev/null
@@ -1,322 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Snipping examples\n",
- "\n",
- "## Background\n",
- "\n",
- "Snipping is the act of extraction rectangular \"snippets\" from a dataset containing pore-c interactions in the form of pixels. For example, we might have a file containing two-way interactions (similar to conventional Hi-C) in the form of a matrix of genomic bins, that encode the interaction frequency of these pixels. We might want to extract the neighborhood of TAD-boundaries form this file, specifically, neighborhoods centered at TAD-boundaries that stretch 1MB upstream and downstream. The results of this extraction is a 3-dimensional matrix, where the first two dimensions encode the relative genomic offset and the third dimension encodes the particular examples, here TAD-boundaries.\n",
- "\n",
- "## Average representation of snippets\n",
- "\n",
- "Snipping is often the first step of an anlysis pipeline, where the extraction of snippets is followed by reducing them along the example dimension to obtain an average representation of a particular genomic neighborhood.\n",
- "\n",
- "## Higher-order contacts and snipping\n",
- "\n",
- "If we want to extract snippets from a higher-order contact file, things get a little more complicated than for two-way contacts. Specifically, we are often interested in obtaining a two-dimensional representation of a certain high-dimensional snippet to enable visualization. To do this, we need to define different ways of snipping from a high-dimensional file. For example, we might want to know, which two-dimensional contacts are associated with a set of third coordinates, like TAD-boundaries. Or we might want to know which genomic positions are associated with a two-dimensional query. To account for this, spoc implements different \"snipping strategies\" that encapsulate the different ways we might want to query a high-dimensional contact file."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "metadata": {},
- "outputs": [],
- "source": [
- "import pandas as pd\n",
- "import matplotlib.pyplot as plt\n",
- "import numpy as np\n",
- "import seaborn as sbn"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Snipper class\n",
- "The snipper class is spoc's central interface to access snipping functionality. It can be used with different snipping strategies to extract snippets. The idea is that a snipper can hold different snipping strategies that can be deployed to achieve the desired result."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "from spoc.snipping import Snipper"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Snipping strategies and snipping vlaues\n",
- "\n",
- "### Snipping strategies\n",
- "\n",
- "Snipping strategies are specific ways of snipping higher order contacts. For example, the `Triplet1DSnippingStrategy` implements snipping 2D pieces from triplets based on a set of 1d-coordinates. This can be used to ask questions like:\n",
- "\n",
- "- Do sister chromatids form internal loops, when they contact each other?\n",
- "\n",
- "The idea is that a specific snipping strategy encapsulates all information of extracting snippets such as the size of the snippets, potential offsets, values etc. The constructor of a specific snipping strategy is specific to that strategy, but the snipping implementation has a unified strategy to allow composing different strategies in a snipper class.\n",
- "\n",
- "### Snipping values\n",
- "\n",
- "Usually, the values that are extracted from a target file can be either ICCF (iteratively corrected contact frequencies), or observed-over-expected values. The latter represents the ratio of the actually observed contacts and the expected contacts for randomly chosen genomic regions."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [],
- "source": [
- "from spoc.snipping.snipping_strategies import Triplet1DSnippingStrategy, SnippingValues"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 27,
- "metadata": {},
- "outputs": [],
- "source": [
- "snip_strat_center = Triplet1DSnippingStrategy(bin_size=100_000,\n",
- " half_window_size=2_000_000,\n",
- " snipping_value=SnippingValues.ICCF)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 28,
- "metadata": {},
- "outputs": [],
- "source": [
- "snipper = Snipper([snip_strat_center])"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "- Here the snipper is instantiated with a single snipping strategy, that snips ICCF values from the center of a set of regions"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 29,
- "metadata": {},
- "outputs": [],
- "source": [
- "tad_boundaries = pd.read_csv(\"../playground/tad_boundaries.csv\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "- TADs as target regions are loaded"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 30,
- "metadata": {},
- "outputs": [],
- "source": [
- "result = snipper.snip(\"../playground/test.parquet\", tad_boundaries, threads=40)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "- Snipper is used to snip ICCF values around TAD-boundaries with a 1Mbp half-window size"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 31,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjEAAAHPCAYAAACiH4sAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy89olMNAAAACXBIWXMAAA9hAAAPYQGoP6dpAACs6klEQVR4nOzde1wUdfs//tceYDm5i6CwkOcoxUNqULqKmoqgkmSS3hipJemdguXhR0a3Z++CvFVS85a7suyAaXanHw+fyE1DTPGEkSfEusvooy54h7ByWvb0+8MvUxvoXMawcriePubxYHeuveY9M7vre98zc43MbrfbwRhjjDHWzMjvdQMYY4wxxv4M7sQwxhhjrFniTgxjjDHGmiXuxDDGGGOsWeJODGOMMcaaJe7EMMYYY6xZ4k4MY4wxxpol7sQwxhhjrFniTgxjjDHGmqUW3YlZtmwZ+vXrd9v5W7Zsgbe3t9PawxhjjDHpNKgTYzabsXDhQvTp0weenp4IDAzE1KlTcfXqVYe4kpISxMXFQa1Ww9vbG/Hx8SgvL3eIOXPmDIYMGQI3Nzd07NgRq1atqrO8HTt2oEePHnBzc0OfPn3wv//7vw1pPmOMMcaaMWVDXlxZWYnTp09j8eLF6Nu3L27cuIGXXnoJ0dHROHXqlBAXFxeHa9euQa/Xw2w247nnnsPMmTOxdetWAIDRaERERATCw8ORnp6Os2fPYvr06fD29sbMmTMBAEePHsXkyZORkpKCxx9/HFu3bsX48eNx+vRp9O7duyGr0WBlU0aS4q4cdSPFXS5vIxpTLaP1P28oaXFmmXhMFSEGAKy0MJhk4rftMhGXKSfeAcxGyGcFLRmxaaRsCmI26noqCDEq4jLVNtoyCbsTJZSGASiWWUhx1RBvXCXxHXnTbibFme20DVJpqxGPIS7TSlymxS6+rlVW8XYBgFxGe3+YbeLLNBG2BQBUmKtJcUq5+BuptLqClEuldCHFWQjrqVLQcpXc/J4U1xDm//4oSR6Xdt0kydNYGjQSo9FooNfrMWnSJHTv3h0DBw7EW2+9hdzcXBQWFgIA8vPzkZmZiXfffRcDBgxAWFgYNmzYgG3btgkjNhkZGaipqcF7772HXr16ITY2Fi+++CLWrl0rLGvdunUYPXo0kpKSEBwcjJUrV+Lhhx/GW2+9RW7vf/7zH3Tr1g2JiYn4/X0vd+3ahQceeABubm6IjIzEL7/80pDNwhhjjDEnkPycmLKyMshkMuFck5ycHHh7eyM0NFSICQ8Ph1wux/Hjx4WYoUOHwtXVVYiJjIxEQUEBbty4IcSEh4c7LCsyMhI5OTmkdp05cwZhYWF4+umn8dZbb0H2/35lVFZW4rXXXsOHH36II0eOoLS0FLGxsX96/RljjLF7zmaVZmriJO3EVFdXY+HChZg8eTLUajUAwGAwwM/PzyFOqVTCx8cHBoNBiPH393eIqX0sFlM7/06OHj2Kxx57DP/f//f/4e9//7vDPLPZjLfeegs6nQ4hISH44IMPcPToUZw4ceIu1pwxxhhrQuw2aaYmjtyJycjIgJeXlzAdPnzYYb7ZbMakSZNgt9uxadMmyRv6ZxUWFmLUqFFYsmQJFixYUGe+UqnEI488Ijzu0aMHvL29kZ+fX28+k8kEo9HoMJmsTX9HM8YYYy0NuRMTHR2NvLw8Yfr94aHaDszPP/8MvV4vjMIAgFarRXFxsUMui8WCkpISaLVaIaaoqMghpvaxWEzt/Ntp3749Hn30UXzyyScwGo3U1b2tlJQUaDQah2ntucsNzssYY4xJxmaTZmriyJ2YNm3aICgoSJjc3d0B/NaB+f777/HVV1/B19fX4XU6nQ6lpaXIzc0Vnjt48CBsNhsGDBggxGRnZ8Ns/u1Mfb1ej+7du6Nt27ZCzIEDBxxy6/V66HS6O7bb3d0de/fuFU7avXnzpsN8i8XicCVVQUEBSktLERwcXG++5ORklJWVOUzze3e5YxsYY4wxZ7LbbZJMTV2D68Q89dRTOHXqFDIyMmC1WmEwGGAwGFBTc+uSuuDgYIwePRozZszAiRMncOTIESQmJiI2NhaBgYEAgKeffhqurq6Ij4/H+fPnsX37dqxbtw7z588XlvXSSy8hMzMTa9aswcWLF7Fs2TKcOnUKiYmJou309PTEvn37oFQqMWbMGIcaNS4uLpgzZw6OHz+O3NxcPPvssxg4cCAeffTRenOpVCqo1WqHSaVo0TUDGWOMNTc8EiPuypUr2L17N/7v//4P/fr1Q0BAgDAdPXpUiMvIyECPHj0wcuRIjB07FmFhYXj77beF+RqNBvv378dPP/2EkJAQLFiwAEuWLBFqxADAoEGDsHXrVrz99tvo27cvPvvsM+zatYtcI8bLywtffPEF7HY7oqKiUFFxq4aAh4cHFi5ciKeffhqDBw+Gl5cXtm/f3pDNwhhjjDEnkNl/XzCF/Snnuj1Oius4jFbI6doRV9GYq2VepFylMlo9Q5NcvLCViVj8qprYNaaU+aohVpSjvokpRf0sxGzUXwCU3zLUwnPUCx5dJfxU+xIXqiQs00Asr1kspy2UUuyuhFrEjrSngEo7rRBfFWG51K9fK7FtJkKxuwor7XuIUsQOoBXFq7TQlklFKdhHLRBoJY42mKzi+1NBLEJqrJCmEN2d1PzynSR5XDv2lSRPY2lQxV7GGGOMNUHNoMaLFPhkDsYYY4w1SzwSwxhjjLU0zeDKIilwJ4YxxhhraZrBlUVS4MNJjDHGGGuWeCSGMcYYa2GaQ6E6KbTokZisrCzIZDKUlpbWO//y5cuQyWTIy8tzarsYY4yxRsXF7mg+//xzREREwNfX97YdAoPBgClTpkCr1cLT0xMPP/ww/v3vfzvElJSUIC4uDmq1Gt7e3oiPj3eorAsAZ86cwZAhQ+Dm5oaOHTti1apVDW0+Y4wxxpqpBh9OqqioQFhYGCZNmoQZM2bUGzN16lSUlpZi9+7daNeuHbZu3YpJkybh1KlT6N+/PwAgLi4O165dg16vh9lsxnPPPYeZM2di69atAACj0YiIiAiEh4cjPT0dZ8+exfTp0+Ht7e1Q2fdeuFpBKzynPELr1QYOF7++X3m4jJTLpaQNKe6mVfytUClX0JZpl64oHrHWHZmKUFushljUj1pPzkZIJycmcyEuk/LrxIW4TBWxIJuNsLdo7yDAjfj7ykTYC54y2lIrqTuUWEBSSdgeJhmtlofJRiuw50LYHgritrXKaN9X1AJvFGZCsT7qMpXE7ysTqewmoCJ8+ppU7Vg+nEQzZcoULFmyBOHh4beNOXr0KObMmYNHH30U3bp1w6JFi+Dt7S3cFDI/Px+ZmZl49913MWDAAISFhWHDhg3Ytm0brl69CuDWrQtqamrw3nvvoVevXoiNjcWLL76ItWvXkttaWVmJMWPGYPDgwQ6HmC5evIhBgwbBzc0NvXv3xqFDh/7cxmCMMcaaAptVmqmJc8o5MYMGDcL27dtRUlICm82Gbdu2obq6Go899hgAICcnB97e3ggNDRVeEx4eDrlcjuPHjwsxQ4cOhavrbyX5IyMjUVBQgBs3boi2obS0FKNGjYLNZoNer4e3t7cwLykpCQsWLMC3334LnU6HcePG4ddff5Vm5RljjDFns9ukmZo4p3RiPv30U5jNZvj6+kKlUuGvf/0rdu7ciaCgIAC3zpnx8/NzeI1SqYSPjw8MBoMQ4+/v7xBT+7g25nYMBgOGDRuGgIAA7NmzBx4eHg7zExMTERMTg+DgYGzatAkajQabN29u0DozxhhjrHGROzEZGRnw8vISpsOHD5MXsnjxYpSWluKrr77CqVOnMH/+fEyaNAlnz579U42+W6NGjUJQUBC2b9/uMJJTS6fTCX8rlUqEhoYiPz+/3lwmkwlGo9FhqiEex2WMMcacopVcnUQ+sTc6OhoDBgwQHt93332k1/3nP//BW2+9hXPnzqFXr14AgL59++Lw4cPYuHEj0tPTodVqUVxc7PA6i8WCkpISaLVaAIBWq0VRUZFDTO3j2pjbiYqKwr///W9cuHABffr0IbX7dlJSUrB8+XKH557x6IUpXr0blJcxxhiTTDM4FCQF8khMmzZtEBQUJEzu7u6k11VWVt5akNxxUQqFArb/18vT6XQoLS0VTvQFgIMHD8JmswkdJ51Oh+zsbJjNv51Jrtfr0b17d7Rt2/aObUhNTcW0adMwcuRIXLhwoc78Y8eOCX9bLBbk5uYiODi43lzJyckoKytzmP7iWX8sY4wxxhpPgy+xLikpQWFhoXAVUUFBAYBboyNarRY9evRAUFAQ/vrXv2L16tXw9fXFrl27oNfrsXfvXgBAcHAwRo8ejRkzZiA9PR1msxmJiYmIjY1FYGAgAODpp5/G8uXLER8fj4ULF+LcuXNYt24d0tLSSO1cvXo1rFYrRowYgaysLPTo0UOYt3HjRjzwwAMIDg5GWloabty4genTp9ebR6VSQaVSOTznSryEkzHGGHOKZnAoSAoN7sTs3r0bzz33nPA4NjYWALB06VIsW7YMLi4u+N///V+88sorGDduHMrLyxEUFIQPPvgAY8eOFV6XkZGBxMREjBw5EnK5HDExMVi/fr0wX6PRYP/+/UhISEBISAjatWuHJUuW3FWNmLS0NIeOTO35MampqUhNTUVeXh6CgoKEejaMMcZYc2RvJedqyuxNqjpP87QjII4U195WQ4rr4GsUjfEfQRv9uX6IViSr+Ffxgn1Ga92ToutTTix+ZZKLFwMzEwvPmYlV8Shbo4aYi1qIj/IBo47lUb+WKEXxFMRPvq+F9ovORthXvypoW+0asXGVhIJs5cStVgPaepbZacXRLIRzEkzEtlG/pikXGVQS209dpskmnq/CaqItk1hC0krYthZigcAaapxVPE4hp333XS8rIMU1RPV3/ytJHre+Y8WD7iG+ASRjjDHW0rSSE3u5E8MYY4y1NHxODGOMMcaapVYyEuOUir2MMcYYY1LjkRjGGGOspWkGN2+UAndiGGOMsZaGDyc1f1lZWZDJZCgtLa13/uXLlyGTyZCXl+fUdjHGGGOs4Rrcifn8888REREBX1/fO3YIcnJyMGLECHh6ekKtVmPo0KGoqqoS5peUlCAuLg5qtRre3t6Ij49HeXm5Q44zZ85gyJAhcHNzQ8eOHbFq1aqGNp8xxhhrefgGkDQVFRUICwvDpEmTMGPGjHpjcnJyMHr0aCQnJ2PDhg1QKpX47rvvHO6nFBcXh2vXrkGv18NsNuO5557DzJkzsXXrVgCA0WhEREQEwsPDkZ6ejrNnz2L69Onw9va+q6q9jaGcULQNABR2SgkyQFHSRjRGfki8IB4A+I1yI8Ups8tEY1yLxAviAYCyRiUeBKDaKt6HriYWzrMSi+JVEPaVilgEjlol0kZomrwJl5yk/tJREIqjyYklAt2IcRZCnAexlCB1F3jIaF+bNyFeBE5hp23dGmJRPBthLaj7QE787NXIxIvAyYmfTxv1s0c4VKIgtl9G3B4ucvH3EWX7O00rOZzU4E7MlClTANw6NHM78+bNw4svvohXXnlFeK579+7C3/n5+cjMzMTJkycRGhoKANiwYQPGjh2L1atXIzAwEBkZGaipqcF7770HV1dX9OrVC3l5eVi7di25E1NZWYmYmBgYjUbs27dPeP7ixYuYPXs2Tp8+jaCgIGzcuBHDhg27m83AGGOMMSdr9HNiiouLcfz4cfj5+WHQoEHw9/fHsGHD8M033wgxOTk58Pb2FjowABAeHg65XI7jx48LMUOHDhXudwQAkZGRKCgowI0bN0TbUVpailGjRsFms0Gv18Pb21uYl5SUhAULFuDbb7+FTqfDuHHj8Ouvv0qw9owxxtg90EoOJzV6J+bHH38EACxbtgwzZsxAZmYmHn74YYwcORLff/89AMBgMMDPz8/hdUqlEj4+PjAYDEKMv7+/Q0zt49qY2zEYDBg2bBgCAgKwZ88eeHh4OMxPTExETEwMgoODsWnTJmg0GmzevPnPrzRjjDF2L3EnxlFGRga8vLyE6fDhw6TX2f7fRvjrX/+K5557Dv3790daWhq6d++O995778+1+i6NGjUKQUFB2L59u8NITi2dTif8rVQqERoaivz8/HpzmUwmGI1Gh8ncSu4WyhhjjDUl5E5MdHQ08vLyhOn3h37uJCAgAADQs2dPh+eDg4NRWFgIANBqtSguLnaYb7FYUFJSAq1WK8QUFRU5xNQ+ro25naioKGRnZ+PChQukNt9JSkoKNBqNw7Tv5vkG52WMMcakYrdbJZmaOnInpk2bNggKChImd3d30uu6dOmCwMBAFBQ43nr80qVL6Ny5M4BbIyGlpaXIzc0V5h88eBA2mw0DBgwQYrKzs2E2/3bGv16vR/fu3dG2bds7tiE1NRXTpk3DyJEj6+3IHDt2TPjbYrEgNzcXwcHB9eZKTk5GWVmZwxTVppfIVmCMMcaciA8n0ZSUlCAvL0/oHBQUFCAvL084T0UmkyEpKQnr16/HZ599hh9++AGLFy/GxYsXER8fD+DWqMzo0aMxY8YMnDhxAkeOHEFiYiJiY2MRGBgIAHj66afh6uqK+Ph4nD9/Htu3b8e6deswf/58UjtXr16NuLg4jBgxAhcvXnSYt3HjRuzcuRMXL15EQkICbty4genTp9ebR6VSQa1WO0wuMtolnIwxxphT2G3STHfpypUreOaZZ+Dr6wt3d3f06dMHp06daoQVvKXBl1jv3r0bzz33nPA4NjYWALB06VIsW7YMADB37lxUV1dj3rx5KCkpQd++faHX63H//fcLr8vIyEBiYiJGjhwJuVyOmJgYrF+/Xpiv0Wiwf/9+JCQkICQkBO3atcOSJUvuqkZMWloarFYrRowYgaysLOH8mNTUVKSmpiIvLw9BQUHYvXs32rVr15DNwhhjjLUqN27cwODBgzF8+HB88cUXaN++Pb7//nvRoyUNIbPbCRWq2B2ld3yGFNeGeHixrVW8eJSPooaUKzBQvIgdAPiM1IjGGA+LX8oOAMVXxIv1AUBljXjxvwobrZ9dQxxUtBCKbpmJhbnMtDASG7HgFpVcwqJbvlZqoTVxxUra/rxOHNyslomvZ5mM9mvSRNxmFRD/fAKAmZDvpl28IB4AmCUsXFZFXGaVjdo26c6bqLSaSHE1NvF9QC12ZyG2v9xcJRpjJ76HDKX1XzgipaoDb0uSx30kfaDglVdewZEjR8gX/kihRd87iTHGGGuVJDqcVN8VuSZT/Z3N3bt3IzQ0FBMnToSfnx/69++Pd955p1FXkzsxjDHGGKtXfVfkpqSk1Bv7448/YtOmTXjggQfw5ZdfYtasWXjxxRfxwQcfNFr7GnxODGOMMcaaGImuLEpOTq5zAY1KVf/98Ww2G0JDQ/H6668DAPr3749z584hPT0d06ZNk6Q9f8SdGMYYY6ylkeg8KpVKddtOyx8FBATUWxPu3//+tyRtqQ8fTmKMMcZYgw0ePPiONeEaA4/EMMYYYy3NPShUN2/ePAwaNAivv/46Jk2ahBMnTuDtt9/G229Lc6VUfVr0SMyWLVsc7lb9R1lZWZDJZCgtLXVamxhjjLFGdw8q9j7yyCPYuXMnPvnkE/Tu3RsrV67Em2++ibi4uEZaSQk6MZ9//jkiIiLg6+sLmUyGvLy828ba7XaMGTMGMpkMu3btcphXWFiIqKgoeHh4wM/PD0lJSbBYHGsBZGVl4eGHH4ZKpUJQUBC2bNnS0OYzxhhjTCKPP/44zp49i+rqauTn52PGjBmNurwGH06qqKhAWFgYJk2aJNrYN998E7J6ColZrVZERUVBq9Xi6NGjuHbtGqZOnQoXFxfhLOeffvoJUVFReOGFF5CRkYEDBw7g+eefR0BAACIjIxu6Gg1iItYpcyF2GSvt4pW+lNa6d+Ouj2uRFylOkV0qGuM9yp+W6/A1UtyNK56iMW5V4gXxAKDKTIujbFu7nbZDaSXPgGpC0S0FsUiWlJUpqeX13GTSFTNzt9Gq2LnLqQUHxeNciWtK/c3pSvztZ7KLv0NUxFzEzYFqQuE2OXF7KKjbjfD+NhOK0wG0InYA4KkQP9GUWoTPSjwBto2Lh2hMNbFYn1NIWCCxKWvwSMyUKVOwZMkShIeH3zEuLy8Pa9aswXvvvVdn3v79+3HhwgV8/PHH6NevH8aMGYOVK1di48aNqKm5VZk2PT0dXbt2xZo1axAcHIzExEQ89dRTSEtLI7f1+vXrCA0NxZNPPulQrOfIkSN46KGH4ObmhoEDB+LcuXPknIwxxliTwzeAlE5lZSWefvppbNy4EVqtts78nJwc9OnTB/7+v/3Sj4yMhNFoxPnz54WYP3aUIiMjkZOTQ2rDL7/8giFDhqB379747LPPHC4ZS0pKwpo1a3Dy5Em0b98e48aNc7hbNmOMMdas3KMbQDqbUzoxtWcsP/HEE/XONxgMDh0YAMLj2rth3y7GaDSiqurO97QoKCjA4MGDERkZiffffx8KheOQ9tKlSzFq1Cj06dMHH3zwAYqKirBz5867WkfGGGOMORe5E5ORkQEvLy9hot7gaffu3Th48CDefPPNP9vGBqmqqsKQIUMwYcIErFu3rt5zcnQ6nfC3j48Punfvjvz8+m/QVd99JKg3EGOMMcacgg8nOYqOjkZeXp4whYaGkl538OBB/Oc//4G3tzeUSiWU/+8utjExMXjssccAAFqtFkVFRQ6vq31ce/jpdjFqtRru7u63Xb5KpUJ4eDj27t2LK1eukNp8J/XdR0JvPN/gvIwxxphk+HCSozZt2iAoKEiY7tRx+L1XXnkFZ86ccegAAUBaWhref/99ALdGQs6ePYvi4mLhdXq9Hmq1WihhrNPpcODAAYfcer3eYRSl3hWUy/HRRx8hJCQEw4cPx9WrV+vEHDt2TPj7xo0buHTpEoKDg+vNl5ycjLKyModplLqX+IZgjDHGmKQafIl1SUkJCgsLhc5BbclhrVbrMP1Rp06d0LVrVwBAREQEevbsiSlTpmDVqlUwGAxYtGgREhIShBNwX3jhBbz11lt4+eWXMX36dBw8eBCffvop9u3bJ9pGhUKBjIwMTJ48GSNGjEBWVpZDm1asWAFfX1/4+/vjb3/7G9q1a4fx48fXm6u++0goZbTLRhljjDGnaAaHgqTQ4BN7d+/ejf79+yMqKgoAEBsbi/79+yM9PZ2cQ6FQYO/evVAoFNDpdHjmmWcwdepUrFixQojp2rUr9u3bB71ej759+2LNmjV49913yTVilEolPvnkE/Tq1QsjRoxwGPVJTU3FSy+9hJCQEBgMBuzZsweurrQ6LIwxxliT00rOiZHZ7XYp62e1SqmdnyHFqYnvBw9CnLeVlkxNKLgFAO3c73yFFwD4d7lJyuUV3oEUV3X4F9GY0l/cSLkqK2mdTpNZfPCxxkobWTMTi+JVQzwf9dfEvfhKaauoIcVZCdvDYKfdDbeYWBmygrALqmW0r7hyYpyJuBeqCHE3iZ9PBaGoH0Ardldhp5WPqCa2jYJaxK6K2DbK6Df1v7ZfzbTvNcoyTTZa+3/877ekuIao+nSFeBCB+6QlkuRpLHwDSMYYY6ylaSXjE9yJYYwxxlqaZnAoSAot+i7WjDHGGGu5eCSGMcYYa2layUgMd2IYY4yxlqYZFKqTAndiGGOMsZamlYzEtOhzYrZs2QJvb+/bzs/KyoJMJkNpaanT2sQYY4wxaTS4E/P5558jIiICvr6+kMlkwm0FapWUlGDOnDno3r073N3d0alTJ7z44osoKytziCssLERUVBQ8PDzg5+eHpKQkWCyOtQWysrLw8MMPQ6VSISgoCFu2bGlo8xljjLGWx26XZmriGnw4qaKiAmFhYZg0aRJmzJhRZ/7Vq1dx9epVrF69Gj179sTPP/+MF154AVevXsVnn30GALBarYiKioJWq8XRo0dx7do1TJ06FS4uLnj99dcBAD/99BOioqLwwgsvICMjAwcOHMDzzz+PgIAActXexmIGsUgWsWCVghBWJSfmstEKt6mqxYuQuVyh3a1bQShiBwAeI7qJxsi++ZG2zELa0KlLpYtoTLVJPAYALFbabwCFVbovAjto+91GWCTxLQRXJW2/U77vVDW0ooSUgo8AYCPsAuo2MxG3h42Yz0r4jehJvGVJNbHAnoLQNkoMAHjKaJ8DSstqQCt250ZcpplQ1M9CiAEAV7l0Z1V4KGjFHJ2ilRxOavDemzJlCgDg8uXL9c7v3bs3/v3vfwuP77//frz22mt45plnYLFYoFQqsX//fly4cAFfffUV/P390a9fP6xcuRILFy7EsmXL4OrqivT0dHTt2hVr1qwBAAQHB+Obb75BWloauRNz/fp1jBkzBh07dsS2bduE548cOYLk5GRcunQJ/fr1w7vvvovevXv/yS3CGGOMMWe4J+fElJWVQa1WQ6m81YfKyclBnz594O/vL8RERkbCaDTi/PnzQkx4eLhDnsjISOTk5JCW+csvv2DIkCHo3bs3PvvsM4ebOCYlJWHNmjU4efIk2rdvj3HjxsFsppWPZowxxpqcVnLvJKd3Yv773/9i5cqVmDlzpvCcwWBw6MAAEB4bDIY7xhiNRlRV3fm+PwUFBRg8eDAiIyPx/vvvQ6FwHMJdunQpRo0ahT59+uCDDz5AUVERdu7c+afXkTHGGLun7DZppiaO3InJyMiAl5eXMB0+fPiuF2Y0GhEVFYWePXti2bJld/36P6OqqgpDhgzBhAkTsG7dOsjqOS9Fp9MJf/v4+KB79+7Iz8+vN5/JZILRaHSYqMdeGWOMMSYdcicmOjoaeXl5whQaGnpXC7p58yZGjx6NNm3aYOfOnXBx+e0ELq1Wi6KiIof42sdarfaOMWq1Gu7u7rddrkqlQnh4OPbu3YsrV67cVZvrk5KSAo1G4zBll51vcF7GGGNMKnabXZKpqSN3Ytq0aYOgoCBhulPH4Y+MRiMiIiLg6uqK3bt3w83NzWG+TqfD2bNnUVxcLDyn1+uhVqvRs2dPIebAgQMOr9Pr9Q6jKPWRy+X46KOPEBISguHDh+Pq1at1Yo4dOyb8fePGDVy6dAnBwcH15ktOTkZZWZnDNFTT684bgDHGGHMmPieGpqSkBHl5ebhw4QKAW+ef5OXlCeey1HZgKioqsHnzZhiNRhgMBhgMBlittw7DREREoGfPnpgyZQq+++47fPnll1i0aBESEhKEE3BfeOEF/Pjjj3j55Zdx8eJF/POf/8Snn36KefPmibZRoVAgIyMDffv2xYgRI4S21VqxYgUOHDiAc+fO4dlnn0W7du0wfvz4enOpVCqo1WqHSUm8TJIxxhhj0mlwJ2b37t3o378/oqKiAACxsbHo378/0tPTAQCnT5/G8ePHcfbsWQQFBSEgIECYfvnlVj0RhUKBvXv3QqFQQKfT4ZlnnsHUqVOxYsUKYTldu3bFvn37oNfr0bdvX6xZswbvvvsu+fJqpVKJTz75BL169cKIESMcRn1SU1Px0ksvISQkBAaDAXv27IGrK62eBWOMMdbktJITe2V2ezMoydfELekSR4pT22hFptwIe8SL+N7yIh7T1NjEi1G1dTGRcqnbVJPifLvd+aoyAHAf1YOUy3SkgBRXfll81KymilY+qcZELFRGKJ5nJb437HZioTVCnEJGe294utWQ4myEZRZXeJByFSloRc8qCRX7qohF7G4Sf9KVy2gfPhOhCGYFaBcFEFcBRrv459hEXGYVIRcA2AjraSX+N2MnFg61Ev5zrbTTymRQcgGAjRBXQ9xmZwy00iANUbkxUZI8HglvSZKnsfANIBljjLGWphmczyKFFn0DSMYYY4y1XDwSwxhjjLU0rWQkhjsxjDHGWEvTSk535cNJjDHGGGuWeCSGMcYYa2layeGkFj0Ss2XLFnh7e992flZWFmQyGUpLS53WJsYYY6zR2ezSTE1cgzsxn3/+OSIiIuDr6wuZTIa8vLw6MdXV1UhISICvry+8vLwQExNT5z5IhYWFiIqKgoeHB/z8/JCUlASLxfGa+6ysLDz88MNQqVQICgrCli1bGtp8xhhjjDVTDT6cVFFRgbCwMEyaNAkzZsyoN2bevHnYt28fduzYAY1Gg8TEREyYMAFHjhwBAFitVkRFRUGr1eLo0aO4du0apk6dChcXF7z++usAgJ9++glRUVF44YUXkJGRgQMHDuD5559HQEAAuWpvY7ESCzRZ6rmDdn0oJZrMxOpX1cRlqmTi/dkqC+3toqykVTtWFooPd8qJRexUI/qQ4uRHL4jGVP1MK+qnLKdtDxcX8eJiZjOtcJ7VSvvdYSMUz6MUpwMAlTutgJfVKp7PpZI2xO1O/AVoJZSBI9YkhCvxR6cX8bcfpZgg9YYlN4kF6twJt0BxuwcD8OV2YsFEclk/cSri7WBon3YaJXmPOkEzqLYrhQa/m6dMmYIlS5YgPDy83vllZWXYvHkz1q5dixEjRiAkJATvv/8+jh49Ktx4cf/+/bhw4QI+/vhj9OvXD2PGjMHKlSuxceNG1NTcevOnp6eja9euWLNmDYKDg5GYmIinnnoKaWlp5LZev34doaGhePLJJ2Ey/fbWPXLkCB566CG4ublh4MCBOHfuXAO2CGOMMXaP8eEkaeTm5sJsNjt0cnr06IFOnTohJ+dW6eWcnBz06dMH/v7+QkxkZCSMRiPOnz8vxPyxoxQZGSnkEPPLL79gyJAh6N27Nz777DPhxpIAkJSUhDVr1uDkyZNo3749xo0bB7OZVrKaMcYYY/dGo3diDAYDXF1d65xg6+/vL9xN2mAwOHRgaufXzrtTjNFoRFXVne/BU1BQgMGDByMyMhLvv/8+FArHIb+lS5di1KhR6NOnDz744AMUFRVh586dd72ujDHGWFNgt9kkmZo6cicmIyMDXl5ewnT48OHGbJdkqqqqMGTIEEyYMAHr1q2DrJ5zRHQ6nfC3j48Punfvjvz8/HrzmUwmGI1Gh8lipx2vZowxxpyCDyc5io6ORl5enjCFhoaSXqfValFTU1PnMuaioiJotVoh5o9XK9U+FotRq9Vwd3e/7fJVKhXCw8Oxd+9eXLlyhdTmO0lJSYFGo3GYjpaJnyzKGGOMOY3dJs3UxJE7MW3atEFQUJAw3anj8HshISFwcXHBgQMHhOcKCgpQWFgojIDodDqcPXsWxcXFQoxer4darUbPnj2FmN/nqI35/ShKfeRyOT766COEhIRg+PDhuHr1ap2Y2hOMAeDGjRu4dOkSgoOD682XnJyMsrIyh2mQpqfIVmCMMcaY1Bp8iXVJSQkKCwuFzkFBwa1LYrVaLbRaLTQaDeLj4zF//nz4+PhArVZjzpw50Ol0GDhwIAAgIiICPXv2xJQpU7Bq1SoYDAYsWrQICQkJwgm4L7zwAt566y28/PLLmD59Og4ePIhPP/0U+/btE22jQqFARkYGJk+ejBEjRiArK0sY4QGAFStWwNfXF/7+/vjb3/6Gdu3aYfz48fXmUqlUDicFA4CSeCkfY4wx5hTN4FCQFBp8Yu/u3bvRv39/REVFAQBiY2PRv39/pKenCzFpaWl4/PHHERMTg6FDh0Kr1eLzzz8X5isUCuzduxcKhQI6nQ7PPPMMpk6dihUrVggxXbt2xb59+6DX69G3b1+sWbMG7777LrlGjFKpxCeffIJevXphxIgRDqM+qampeOmllxASEgKDwYA9e/bA1ZVW64Qxxhhrcmw2aaYmTma3t5JbXTaiRV2eJsW5EouLtSHEeRDfWyri3vW2iif0JJ7A3EZBuzxd7S5eZqqNTzUtVw9SGFwf6y8aYz5xlpTL9COtbTXl4iN15ipisTsL7XcHpfAclbsXbX9azeJtu37di5TrV4tKPAhAuVx8u1ELPhqJA6pVxE1bSSh2V02IAQALsaAmpSiemZirBrQvGTPhvIkKu7QlKygtq7TRCuyZ78GFGdlXDogHNVDFssmS5PFc9okkeRoL3wCSMcYYa2layeEk7sQwxhhjLU0zuLJICi36LtaMMcYYa7l4JIYxxhhrafhwEmOMMcaao+ZwywAp8OEkxhhjjEkuNTUVMpkMc+fObbRltOhOzOXLlyGTyZCXl3fbGJlMhl27djmtTYwxxliju8f3Tjp58iT+9a9/4aGHHpJwpepySiemvLwciYmJ6NChA9zd3dGzZ0+HYngAUF1djYSEBPj6+sLLywsxMTF17pVUWFiIqKgoeHh4wM/PD0lJSbBYLM5YBcYYY6z5uIedmPLycsTFxeGdd95B27ZtJV4xR045J2b+/Pk4ePAgPv74Y3Tp0gX79+/H7NmzERgYiOjoaADAvHnzsG/fPuzYsQMajQaJiYmYMGECjhw5AgCwWq2IioqCVqvF0aNHce3aNUydOhUuLi54/fXXnbEat0UtHqUCrUqWiRDmQiy4RS12ZyYUBLMSi/WZbbS+cXWN+NtPcZNWOVn5E63wnMxDvJCdyxDazU1l7mdIcYqfjKIxyjLa8WubhbYPLCbxfUAtc+nqSSsGZrOIr4O7kVb0zM1C+2qiHPa3EwriAYC7jbZtrcSffnbi552C8p0AAB4gFP8jFrGj/sKVE9omB+1zXA3aj1Ir4c3rIactk1oUz5VwexmjTbyAp9Pcw0usExISEBUVhfDwcPz9739v1GU5ZSTm6NGjmDZtGh577DF06dIFM2fORN++fXHixAkAQFlZGTZv3oy1a9dixIgRCAkJwfvvv4+jR48KN2fcv38/Lly4gI8//hj9+vXDmDFjsHLlSmzcuBE1NbQ3odVqxfTp09GjRw8UFhYKz1+7dg1jxoyBu7s7unXrhs8++0z6jcAYY4w1MyaTCUaj0WEymW7fWdu2bRtOnz6NlJQUp7TPKZ2YQYMGYffu3bhy5Qrsdju+/vprXLp0CREREQCA3NxcmM1mhIeHC6/p0aMHOnXqhJycHABATk4O+vTpA39/fyEmMjISRqMR58+fF22DyWTCxIkTkZeXh8OHD6NTp07CvMWLFyMmJgbfffcd4uLiEBsbi/z8fKlWnzHGGHMuiQ4npaSkQKPROEy366D88ssveOmll5CRkQE3NzenrKZTDidt2LABM2fORIcOHaBUKiGXy/HOO+9g6NChAACDwQBXV1d4e3s7vM7f3x8Gg0GI+X0HpnZ+7bw7KS8vR1RUFEwmE77++mtoNBqH+RMnTsTzzz8PAFi5ciX0ej02bNiAf/7zn396nRljjLF7xS5RnZjk5GTMnz/f4TmVqv57m+Xm5qK4uBgPP/yw8JzVakV2djbeeustmEwmKBTEm5QRSdqJycjIwF//+lfh8RdffIEhQ4Zgw4YNOHbsGHbv3o3OnTsjOzsbCQkJCAwMdBh9aSyTJ09Ghw4dcPDgQbi7u9eZr9Pp6jy+3RVNJpOpzlCaxW6FknC8lDHGGGtOVCrVbTstfzRy5EicPet43uFzzz2HHj16YOHChZJ3YACJOzHR0dEYMGCA8Pi+++5DVVUVXn31VezcuRNRUVEAgIceegh5eXlYvXo1wsPDodVqUVNTg9LSUofRmKKiImi1WgCAVqsVzqH5/fzaeXcyduxYfPzxx8jJycGIESMatI4pKSlYvny5w3ODNL0Q5t2nQXkZY4wxydyDir1t2rRB7969HZ7z9PSEr69vneelIuk5MW3atEFQUJAwubu7w2w2w2w2Qy53XJRCoYDt/11aEBISAhcXFxw48NvtyQsKClBYWCiMkuh0Opw9exbFxcVCjF6vh1qtRs+ePe/YrlmzZiE1NRXR0dE4dOhQnfm1Jw///nFwcHC9uZKTk1FWVuYwDdTcefmMMcaYU9ls0kxNXKOfE6NWqzFs2DAkJSXB3d0dnTt3xqFDh/Dhhx9i7dq1AACNRoP4+HjMnz8fPj4+UKvVmDNnDnQ6HQYOHAgAiIiIQM+ePTFlyhSsWrUKBoMBixYtQkJCAmmoa86cObBarXj88cfxxRdfICwsTJi3Y8cOhIaGIiwsDBkZGThx4gQ2b95cb576htb4UBJjjDFWV1ZWVqPmd8qJvdu2bUNycjLi4uJQUlKCzp0747XXXsMLL7wgxKSlpUEulyMmJgYmkwmRkZEOJ9YqFArs3bsXs2bNgk6ng6enJ6ZNm4YVK1aQ2zF37lzYbDaMHTsWmZmZGDRoEABg+fLl2LZtG2bPno2AgAB88sknoqM7jDHGWJPVSm4AKbPbqSWv2O0s6RJHinMhbml3QlE5N2Iu6jI9CaOGGhut6JmnnRinEC9s5elGqwHkpaYVmfJsL57PrZealEsZNkA8CID15GnRGEvhDVquctrwrrVSPMZupVVQU7ahvYnshF1VUlj3xPp648o8SHHVVvFR0Jsy2m+1cjnt6LqJUBgSACoJ6cqJB/RpnyigSia+ryqJxe4qiUulFPusIS7TTCzQZiUs00xcZoWNVoCRwk4sfLr/l0zJlnk7N18YLUmeNumN39aGaNH3TmKMMcZYy+WUw0mMMcYYc57WcpCFOzGMMcZYS9NKzonhTgxjjDHW0rSSTgyfE8MYY4yxZolHYhhjjLEWRqp7JzV1LXok5vLly5DJZLe9DxIAyGQy7Nq1y2ltYowxxhqdRHexbuqc1onJz89HdHQ0NBoNPD098cgjj6CwsFCYX11djYSEBPj6+sLLywsxMTHCvZFqFRYWIioqCh4eHvDz80NSUhIsFvFaI4wxxhhreZxyOOk///kPwsLCEB8fj+XLl0OtVuP8+fNwc3MTYubNm4d9+/Zhx44d0Gg0SExMxIQJE3DkyBEAt27nHRUVBa1Wi6NHj+LatWuYOnUqXFxc8PrrrztjNW7LRCyqpJSwz0jtH1OXaCPU7zKDVuSrhrhUF5t4nIuZdksHUzXtraw0ihfwUvxkJOWSqcSL2AGAYvAg8VzuuaRc8l/+S4szihfwstfQ3kUKNW3b2qrFt62rilZAzc1Fuh8nVivt/WijfAgAyGlhsBKK4lFvTWMiLpOSzkYs1udC/O+BUuzuJmj7001G3FeEmAo7cZly2ncMpZBdOXGZTtH0b3skCaeMxPztb3/D2LFjsWrVKvTv3x/3338/oqOj4efnBwAoKyvD5s2bsXbtWowYMQIhISF4//33cfToUeHmjPv378eFCxfw8ccfo1+/fhgzZgxWrlyJjRs3oqaGVtXVarVi+vTp6NGjh8Mo0LVr1zBmzBi4u7ujW7du+Oyzz6TfCIwxxpiT2G12SaamrtE7MTabDfv27cODDz6IyMhI+Pn5YcCAAQ7noeTm5sJsNiM8PFx4rkePHujUqRNycnIAADk5OejTpw/8/f2FmMjISBiNRpw/f160HSaTCRMnTkReXh4OHz6MTp06CfMWL16MmJgYfPfdd4iLi0NsbCzy8/MlWHvGGGOMNZZG78QUFxejvLwcqampGD16NPbv348nn3wSEyZMwKFDhwAABoMBrq6u8Pb2dnitv78/DAaDEPP7Dkzt/Np5d1JeXo6oqChcv34dX3/9Ndq3b+8wf+LEiXj++efx4IMPYuXKlQgNDcWGDRsastqMMcbYvdNKTuyV9JyYjIwM/PWvfxUef/HFF7j//vsBAE888QTmzZsHAOjXrx+OHj2K9PR0DBs2TMom1Gvy5Mno0KEDDh48CHf3ujeg0+l0dR7f7oomk8kEk8nxZoMWuxVKGe24KmOMMdbo+JyYuxcdHY28vDxhCg0NRbt27aBUKtGzZ0+H2ODgYOG8FK1Wi5qaGpSWljrEFBUVQavVCjF/vFqp9nFtzO2MHTsWZ86cEQ5NNURKSgo0Go3DdKzsQoPzMsYYY+zuSNqJadOmDYKCgoTJ3d0drq6ueOSRR1BQUOAQe+nSJXTu3BkAEBISAhcXFxw4cECYX1BQgMLCQmGURKfT4ezZsyguLhZi9Ho91Gp1nQ7SH82aNQupqamIjo4WDmH9Xu3Jw79/HBwcXG+u5ORklJWVOUwDNXdePmOMMeZMreXEXqdcYp2UlIS//OUvGDp0KIYPH47MzEzs2bMHWVlZAACNRoP4+HjMnz8fPj4+UKvVmDNnDnQ6HQYOHAgAiIiIQM+ePTFlyhSsWrUKBoMBixYtQkJCAlQqlWgb5syZA6vViscffxxffPEFwsLChHk7duxAaGgowsLCkJGRgRMnTmDz5s315lGpVHWWx4eSGGOMNSmt5HCSUzoxTz75JNLT05GSkoIXX3wR3bt3x7///W+HjkRaWhrkcjliYmJgMpkQGRmJf/7zn8J8hUKBvXv3YtasWdDpdPD09MS0adOwYsUKcjvmzp0Lm82GsWPHIjMzE4MG3arfsXz5cmzbtg2zZ89GQEAAPvnkE9HRHcYYY6ypag6jKFKQ2e321rGmjWhJlzhSnAtxS7vbxYtRqYi53IhxlHxqK61r72qnxXnJxAufqeS04mht3E3iQQA82ojXFHJvS6s7pNISi/p1v/M5WwAgD6Od4G7PPU6Ks14TL4pnr6BtM5m7KynOXiW+3SoLaNvWeN1NPAhAdZWLaExVjXgMABittLhK4shrNaGoXCWxcl4V8cA/5dNSQcxVQyypWSMTj6N9ioEKYiSlwJ6N2P5KO22ZlGzVxKJ+n/28mxTXECVPSnPRjM/OuqdgNCV8A0jGGGOspeHDSYwxxhhrjogD4s1ei76LNWOMMcZaLh6JYYwxxlqaVjISw50YxhhjrIXhw0mMMcYYY01Yi+7EZGVlQSaT1bmdQa3Lly9DJpPd9j5JjDHGWLNkk2hq4pzeiXnhhRcgk8nw5ptvOjxfUlKCuLg4qNVqeHt7Iz4+HuXl5Q4xZ86cwZAhQ+Dm5oaOHTti1apVTmw5Y4wx1jzYbdJMTZ1Tz4nZuXMnjh07hsDAwDrz4uLicO3aNej1epjNZjz33HOYOXMmtm7dCgAwGo2IiIhAeHg40tPTcfbsWUyfPh3e3t6YOXOmM1fjT1OCVthKSlK+B6m5qD1jC6WoHzGX1UZbqtUiHmepphUzUxppRbLkv4gXnpMRi9jJB40gxeH0EdEQe/GvtFwutK8Je3mleCoNrcCeW6WZFCclO60OHxRWWhE1F7v4e00O2nvNhfBZAWhF8aiFXKnfV5Q1oO5NObGQYBXh24haYE9OKEoIgLY1uHSs0zltJObKlSuYM2cOMjIy4OLiWBkzPz8fmZmZePfddzFgwACEhYVhw4YN2LZtG65evQoAyMjIQE1NDd577z306tULsbGxePHFF7F27VpyGyorKzFmzBgMHjzY4RDTxYsXMWjQILi5uaF379713iSSMcYYay5ay0iMUzoxNpsNU6ZMQVJSEnr16lVnfk5ODry9vREaGio8Fx4eDrlcjuPHjwsxQ4cOhavrbyXQIyMjUVBQgBs3boi2obS0FKNGjYLNZoNer4e3t7cwLykpCQsWLMC3334LnU6HcePG4ddfib9SGWOMsSaGOzESeuONN6BUKvHiiy/WO99gMMDPz8/hOaVSCR8fHxgMBiHG39/fIab2cW3M7RgMBgwbNgwBAQHYs2cPPDw8HOYnJiYiJiYGwcHB2LRpEzQazW3vYs0YY4w1eXaZNFMTJ+k5MRkZGfjrX/8qPP7iiy/g4eGBdevW4fTp05ARjz1KbdSoUXj00Uexfft2KBR1j7nqdDrhb6VSidDQUOTn59eby2QywWRyPK5vsVuhJB7LZYwxxpg0JB2JiY6ORl5enjCFhobi8OHDKC4uRqdOnaBUKqFUKvHzzz9jwYIF6NKlCwBAq9WiuLjYIZfFYkFJSQm0Wq0QU1RU5BBT+7g25naioqKQnZ2NCxcuNHgdU1JSoNFoHKYjZecbnJcxxhiTCh9O+hPatGmDoKAgYXJ3d8eUKVNw5swZh85NYGAgkpKS8OWXXwK4NRJSWlqK3NxcIdfBgwdhs9kwYMAAISY7Oxtm82/nuev1enTv3h1t27a9Y7tSU1Mxbdo0jBw5st6OzLFjx4S/LRYLcnNzERwcXG+u5ORklJWVOUyDNXXP82GMMcbuFbtNJsnU1DX6Jda+vr7w9fV1eM7FxQVarRbdu3cHAAQHB2P06NGYMWMG0tPTYTabkZiYiNjYWOFy7KeffhrLly9HfHw8Fi5ciHPnzmHdunVIS0sjtWP16tWwWq0YMWIEsrKy0KNHD2Hexo0b8cADDyA4OBhpaWm4ceMGpk+fXm8elUoFlcrxwl8+lMQYY4w5X5O5d1JGRgYSExMxcuRIyOVyxMTEYP369cJ8jUaD/fv3IyEhASEhIWjXrh2WLFlyVzVi0tLSHDoytVc6paamIjU1FXl5eQgKCsLu3bvRrl07ydeRMcYYc4bmcChICjK73c7leRpoRec4UpwrsXiUO2GPqIh7TUGM8yC84VXEt4qHjVZmSkWoDOWpoJXJUilpy3R3E8/noaYVZFO1sZDiXNuJ73dlgBcpl+L+DqQ4+aBRojH2MzmkXPbqalpcqVE0xnzuF1Iuk4H2DVxTLv47rKrcRTQGAKqraHFVNcQ4q/gIbSWx2F21jHbkv0ouHldDPEJQQTzZwErIR/ukAJUy2ndMNSHOQqw8V0OMo7wjzcSSoP+8/CkpriGu6IiFMUXcl3NQkjyNpUXfO4kxxhhjLVeTOZzEGGOMMWm0lsNJ3IlhjDHGWpjmcGWRFPhwEmOMMcaaJR6JYYwxxlqY1nLJDndiGGOMsRaGDye1AFlZWZDJZCgtLa13/uXLlyGTyZCXl+fUdjHGGGONqbVU7G30TozZbMbChQvRp08feHp6IjAwEFOnTsXVq1cd4kpKShAXFwe1Wg1vb2/Ex8ejvLzcIebMmTMYMmQI3Nzc0LFjR6xataqxm88YY4yxJqrRDydVVlbi9OnTWLx4Mfr27YsbN27gpZdeQnR0NE6dOiXExcXF4dq1a9Dr9TCbzXjuuecwc+ZMbN26FQBgNBoRERGB8PBwpKen4+zZs5g+fTq8vb3vqmpvY6AWsVMSj1FSrowj1oQiL5PChXiQldozlhGLTElJJhdfJvnXB/E29fYawjIraAX27MW/0uIIhezkj4gXxAMA2+WzpDjgsvgy1a6kTMpyWoE92MWLHNoo1djugo2430loNRrpn2PCl4dSRmu/jPi9ZiaE0XcBLZDyHWOlfi9Tv0wJTMRlOgOfEyMRjUYDvV7v8Nxbb72FRx99FIWFhejUqRPy8/ORmZmJkydPIjQ0FACwYcMGjB07FqtXr0ZgYCAyMjJQU1OD9957D66urujVqxfy8vKwdu1aciemsrISMTExMBqN2Ldvn/D8xYsXMXv2bJw+fRpBQUHYuHEjhg0bJt1GYIwxxpyoORwKksI9OSemrKwMMpkM3t7eAICcnBx4e3sLHRgACA8Ph1wux/Hjx4WYoUOHCvc7AoDIyEgUFBTgxo0bosssLS3FqFGjYLPZoNfrhWUDQFJSEhYsWIBvv/0WOp0O48aNw6+/0n7xMsYYY+zecHonprq6GgsXLsTkyZOhVqsBAAaDAX5+fg5xSqUSPj4+MBgMQoy/v79DTO3j2pjbMRgMGDZsGAICArBnzx54eHg4zE9MTERMTAyCg4OxadMmaDQabN68uUHryRhjjN0rdrtMkqmpk7QTk5GRAS8vL2E6fPiww3yz2YxJkybBbrdj06ZNUi76jkaNGoWgoCBs377dYSSnlk6nE/5WKpUIDQ1Ffn5+vblMJhOMRqPDZCEcl2eMMcacxW6TZmrqJO3EREdHIy8vT5h+f3iotgPz888/Q6/XC6MwAKDValFcXOyQy2KxoKSkBFqtVogpKipyiKl9XBtzO1FRUcjOzsaFCxcatH4AkJKSAo1G4zBllZ1vcF7GGGOM3R1JOzFt2rRBUFCQMLm7uwP4rQPz/fff46uvvoKvr6/D63Q6HUpLS5Gbmys8d/DgQdhsNgwYMECIyc7OhtlsFmL0ej26d++Otm3b3rFdqampmDZtGkaOHFlvR+bYsWPC3xaLBbm5uQgODq43V3JyMsrKyhymxzS9RLYMY4wx5jw2u0yS6W6kpKTgkUceQZs2beDn54fx48ejoKCgkdbwFqfUiXnqqadw6tQpZGRkwGq1wmAwwGAwoKamBgAQHByM0aNHY8aMGThx4gSOHDmCxMRExMbGIjAwEADw9NNPw9XVFfHx8Th//jy2b9+OdevWYf78+aR2rF69GnFxcRgxYgQuXrzoMG/jxo3YuXMnLl68iISEBNy4cQPTp0+vN49KpYJarXaYlDJFA7YQY4wxJq17cU7MoUOHkJCQgGPHjgnlUiIiIlBRUdFIa+mES6yvXLmC3bt3AwD69evnMO/rr7/GY489BuDW+TSJiYkYOXIk5HI5YmJisH79eiFWo9Fg//79SEhIQEhICNq1a4clS5bcVY2YtLQ0WK1WjBgxAllZWcL5MampqUhNTUVeXh6CgoKwe/dutGvXrmErzhhjjLUimZmZDo+3bNkCPz8/5ObmYujQoY2yTJnd3lpK4jSef3V4hhRH7TF62MR3iQtxr8mJu9eNcAaXG6kMH+Amp53o7KoQj1O50HK5u9eQ4lzcCMv0stBytaFtW4W3+EidQqMi5ZK305DiZO19xGM6dyPlUg4YR4qznPpCNMZ2/JhoDABYr5WQ4mzlZtEYSyntfVtTRhuYrqmkfZJrTOL73VTtQsplMtNGe6ss4m2rttNymYmF26pl4tvNSiywVyGXrsAeJQYATMQ4yjcRtTTLyz9/TAtsgIsPjpUkT9ezO2EyORbjVKlUUKnEv7N++OEHPPDAAzh79ix69+4tSXv+qEXfO4kxxhhrjex2aab6LmZJSUkRXb7NZsPcuXMxePDgRuvAAHwXa8YYY6zFkapib3Jycp1zTymjMAkJCTh37hy++eYbSdpxO9yJYYwxxli9qIeOfi8xMRF79+5FdnY2OnTo0Egtu4U7MYwxxlgLI+mNSonsdjvmzJmDnTt3IisrC127dm30ZXInhjHGGGth7sUtAxISErB161b8z//8D9q0aSPcEkij0Qh146TGJ/YyxhhjrME2bdp0qwDsY48hICBAmLZv395oy2zRIzHPPvssSktLsWvXrnrnL1u2DLt27UJeXp5T28UYY4w1pntRPOVeVGxpViMxGzduRJcuXeDm5oYBAwbgxIkT97pJjDHGWJNzL247cC80m5GY7du3Y/78+UhPT8eAAQPw5ptvIjIyEgUFBfDz87vXzSOREzupVkKRKTloyaTcwXZi8Ssr8Y1POWZL7dhbLLQCXgqreOEzq5nWt5dX0wrxySrF42Su4kXbAEBWXkmKg5KyPX4kpbLI99EWOXC8eC5SJkD24yVSnP1GmWiMsvQmKZfLDdq2dSulFVa0lovH1JTTcpmraO9vU7X4J95cQyycZ6IV4qu2iuerttGW6WGjffaq5OJx1DEBaoE9KyGsqun/n9/iNJuRmLVr12LGjBl47rnn0LNnT6Snp8PDwwPvvfceOcfJkyfRvn17vPHGGw7P/+tf/0LHjh3h4eGBSZMmoaxM/IuRMcYYa6ruxb2T7oVm0YmpqalBbm4uwsPDhefkcjnCw8ORk5NDynHw4EGMGjUKr732GhYuXCg8/8MPP+DTTz/Fnj17kJmZiW+//RazZ8+WfB0YY4wxZ5GqYm9T1yw6Mf/9739htVrh7+/v8Ly/v79wCded7Ny5E0888QT+9a9/1blhZHV1NT788EP069cPQ4cOxYYNG7Bt2zZSXsYYY4zdO83mnJg/6/jx49i7dy8+++wzjB8/vs78Tp064b777hMe63Q62Gw2FBQUQKvV1ok3mUx1boZltlvhIqMd82WMMcYaW3M4KVcKzWIkpl27dlAoFCgqKnJ4vqioqN6Oxu/df//96NGjB9577z2YzbSTJ++kvpthZd483+C8jDHGmFT4nJgmxNXVFSEhIThw4IDwnM1mw4EDB6DT6e742nbt2uHgwYP44YcfMGnSpDodmcLCQly9elV4fOzYMcjlcnTv3r3efMnJySgrK3OYRrfp1YC1Y4wxxqTVWi6xbhadGACYP38+3nnnHXzwwQfIz8/HrFmzUFFRgeeee070tX5+fjh48CAuXryIyZMnw2L57UJPNzc3TJs2Dd999x0OHz6MF198EZMmTbrtCI9KpYJarXaY+FASY4wx5nzN5pyYv/zlL7h+/TqWLFkCg8GAfv36ITMzs87Jvrej1Wpx8OBBPPbYY4iLi8PWrVsBAEFBQZgwYQLGjh2LkpISPP744/jnP//ZmKvCGGOMNapmcGGRJGT2e1EnuIVJ6fwMKc6FuKUpA3gqYi7qMin53G20ZC7EtxRl/MrdLl6cDgDcZLTCcyq5eJw7sfCcyo1Wus1VJR6n8qTlUrWlbQ+FWnyQVa52peXyb0uKkw8YKBqjDB1DymU5vocUZ/9ZvGCf/XoJKZetxEiKs/5aRYsziu8rSwVtuN5UThvtrakU/11aY6L9dq2uIcZZCMskFrGrJH0rADUy8XxmGW3b1hDjKgmrYCYefZn9y8e0wAY4GhAjSZ5B1/4tSZ7G0mwOJzHGGGOM/V6zOZzEGGOMMZrmcGWRFLgTwxhjjLUwtAPPzR8fTmKMMcZYs8QjMYwxxlgLYyddItL8cSeGMcYYa2GIF5M2ey36cNLly5chk8mQl5d32xiZTIZdu3Y5rU2MMcYYk4ZTOjEpKSl45JFH0KZNG/j5+WH8+PEoKChwiKmurkZCQgJ8fX3h5eWFmJiYOvdKKiwsRFRUFDw8PODn54ekpCSH6ruMMcYYA2yQSTI1dU45nHTo0CEkJCTgkUcegcViwauvvoqIiAhcuHABnp6eAIB58+Zh37592LFjBzQaDRITEzFhwgQcOXIEAGC1WhEVFQWtVoujR4/i2rVrmDp1KlxcXPD66687YzVuq1hGLHpGLKpE4W6n9T9diPk8CJfjlSlo7XchXtrnQTh9vpy4np7EYlouVvG3vHsV7WPhUUPb724u4nFuVbRcHjUm8SAAqgrxon7K8mpSLthoxeJkl38QjbFYaUXs5N36kuJs1w2iMXbrdVIuexWtyKG1nHbdh/mm+OfAVE4sKFdJ+ySbqsXzVdXQclVZaYXnqgkF6qoJxekAoEpOLP5H+C41Eb9ua4hxlHwmWdM5htNazolxykhMZmYmnn32WfTq1Qt9+/bFli1bUFhYiNzcXABAWVkZNm/ejLVr12LEiBEICQnB+++/j6NHj+LYsWMAgP379+PChQv4+OOP0a9fP4wZMwYrV67Exo0bUVNTQ2qH1WrF9OnT0aNHDxQWFgrPX7t2DWPGjIG7uzu6deuGzz77TPqNwBhjjDmJTaKpqbsn58SUlZUBAHx8fAAAubm5MJvNCA8PF2J69OiBTp06IScnBwCQk5ODPn36ONwrKTIyEkajEefPnxddpslkwsSJE5GXl4fDhw+jU6dOwrzFixcjJiYG3333HeLi4hAbG4v8/HxJ1pUxxhhjjcPpnRibzYa5c+di8ODB6N27NwDAYDDA1dUV3t7eDrH+/v4wGAxCzB9v9lj7uDbmdsrLyxEVFYXr16/j66+/Rvv27R3mT5w4Ec8//zwefPBBrFy5EqGhodiwYUNDVpMxxhi7Z+yQSTI1dU6/xDohIQHnzp3DN99847RlTp48GR06dMDBgwfh7u5eZ75Op6vz+HZXNJlMJphMjuclWOxWKGW048eMMcZYY2sOh4Kk4NSRmMTEROzduxdff/01OnToIDyv1WpRU1OD0tJSh/iioiJotVoh5o9XK9U+ro25nbFjx+LMmTPCoamGSElJgUajcZhOlvGhJ8YYY8zZnNKJsdvtSExMxM6dO3Hw4EF07drVYX5ISAhcXFxw4MAB4bmCggIUFhYKoyQ6nQ5nz55FcXGxEKPX66FWq9GzZ887Ln/WrFlITU1FdHQ0Dh06VGd+7cnDv38cHBxcb67k5GSUlZU5TI9o6o9ljDHG7oXWcmKvUw4nJSQkYOvWrfif//kftGnTRjiHRaPRwN3dHRqNBvHx8Zg/fz58fHygVqsxZ84c6HQ6DBw4EAAQERGBnj17YsqUKVi1ahUMBgMWLVqEhIQEqFQq0TbMmTMHVqsVjz/+OL744guEhYUJ83bs2IHQ0FCEhYUhIyMDJ06cwObNm+vNo1Kp6iyPDyUxxhhrSprD+SxScEonZtOmTQCAxx57zOH5999/H88++ywAIC0tDXK5HDExMTCZTIiMjMQ///lPIVahUGDv3r2YNWsWdDodPD09MW3aNKxYsYLcjrlz58Jms2Hs2LHIzMzEoEGDAADLly/Htm3bMHv2bAQEBOCTTz4RHd1hjDHG2L0ls9vtTac6TzM1r0ssKc5Vwp7xvSh2Rz326EJ8R1GK3VFzedpoA58uhLe7O8QLxQGAh0LCYncqYrE7NbHYnReh2F0b2sZ10YqPdAKAst+DojGyjl1IucjF7o5/KR7z48+kXFZDKSnOXEyrS2Uu42J3QgwXu3Pwt58zaAttgD3ayZLkGWf4RJI8jYVvACkBM2hvXBPxCKMbobtgldFyqSTsOFGPj3oQl0kpsqsidmLsxC6WG6XPTl1RWl8HNkoHkfjlJ6+g/QdEKw5NWwGFF62Srf1GKSHqMimX7ddi8SAAsod04jHl5bRcZRWkOLkrbXvIFOL7VCZ3/m9I6hKpJecpHxcLsVq5lGXuzRJ2Tm7FiW85ai5naA63DJBCi74BJGOMMcZaLh6JYYwxxlqY1nKeCHdiGGOMsRamOVweLQXuxDDGGGMtjI14HlJzx+fEMMYYY6xZatGdmMuXL0Mmk932PkgAIJPJsGvXLqe1iTHGGGtsdommps7pnZjU1FTIZDLMnTvX4fnq6mokJCTA19cXXl5eiImJqXOvpMLCQkRFRcHDwwN+fn5ISkqCxUKrr8EYY4y1Fq3ltgNO7cScPHkS//rXv/DQQw/VmTdv3jzs2bMHO3bswKFDh3D16lVMmDBBmG+1WhEVFYWamhocPXoUH3zwAbZs2YIlS5Y4cxUYY4wx1kQ47cTe8vJyxMXF4Z133sHf//53h3llZWXYvHkztm7dihEjRgC4dUuC4OBgHDt2DAMHDsT+/ftx4cIFfPXVV/D390e/fv2wcuVKLFy4EMuWLYOrq6toG6xWK2bMmIGjR49i//796NSpEwDg2rVrGDNmDLKyshAQEIBVq1bhqaeeIq9bqZ1W/MqVWLWymtD/pRTEAwArMa6KcA6Yu8R9Xsq9PazEc9Ooha1sNsIyietpIRSxAwA7YZmKGtrArZTn6tmIG1fpSXt/K0tvigdZaAX27OYi8SAAMqNRNEb+8GBSLlTTqiHbq/6PFKesFq/sazXRfusqlbQ4avVZ0jKJBRilPOZATUX9vEtJTioe13QOwBC+dloEp43EJCQkICoqCuHh4XXm5ebmwmw2O8zr0aMHOnXqhJycHABATk4O+vTpA39/fyEmMjISRqMR58+fF12+yWTCxIkTkZeXh8OHDwsdGABYvHgxYmJi8N133yEuLg6xsbHIz89vyOoyxhhj94wNMkmmps4pIzHbtm3D6dOncfLkyXrnGwwGuLq6wtvb2+F5f39/4Y7XBoPBoQNTO7923p2Ul5cjKioKJpMJX3/9NTQajcP8iRMn4vnnnwcArFy5Enq9Hhs2bHC4ASVjjDHGmpZG78T88ssveOmll6DX6+Hm5tbYi6vX5MmT0aFDBxw8eBDu7u515ut0ujqPb3dFk8lkgsnkOOxstVuhkNFulsYYY4w1tqZzYKtxNfrhpNzcXBQXF+Phhx+GUqmEUqnEoUOHsH79eiiVSlitVmi1WtTU1KC0tNThtUVFRdBqtQAArVZb52ql2se1MbczduxYnDlzRjg01RApKSnQaDQO0/mySw3OyxhjjEnFJpNmauoavRMzcuRInD17Fnl5ecIUGhqKuLg45OXlQaFQICQkBC4uLjhw4IDwuoKCAhQWFgqjJDqdDmfPnkVx8W93t9Xr9VCr1ejZs+cd2zBr1iykpqYiOjoahw4dqjP/2LFjdR4HBwfXmys5ORllZWUOUy/Ng+TtwRhjjDFpNPrhpDZt2qB3794Oz3l6esLX11d4XqPRID4+HvPnz4ePjw/UajXmzJkDnU6HgQMHAgAiIiLQs2dPTJkyBatWrYLBYMCiRYuQkJAAlUol2o45c+bAarXi8ccfxxdffIGwsDBh3o4dOxAaGoqwsDBkZGTgxIkT2Lx5c715VCpVneXxoSTGGGNNSXOo8SKFJnPvpLS0NMjlcsTExMBkMiEyMtLhxFqFQoG9e/di1qxZ0Ol08PT0xLRp07BixQryMubOnQubzYaxY8ciMzMTgwYNAgAsX74c27Ztw+zZsxEQEIBPPvlEdHSHMcYYa6payzkxMrvd3lrWtdE803mCeBDodWIoqHViVMQ4yqFPap0YN2L9FHdCnAspE+BCfBd7EX6eqGy0ZG7Ej46XXbw2ipeCVovFQ0WLU7mJx6ncadWuvQKIywz2Fo2ReXmQcsFMa5vMz1c0hlonxnb0ICnOcolWJ8ZsEK8TY7pBrONkFK+DBQCVFeJxFSbap6rGRhthLreLx1XKabmqiYWQqgibzUQ8n6NawlpUVcTaOisuZ9AW2gCbOzwjSZ74//tYkjyNpcmMxDRnlaB94VrttC8sBaGzYCV+WKg9VBdCN8ZEzEY9GcxEWAdKRwcA3Kj1DAi7wErMRS12Jyd0imxW2n8sdlo9Nkj508S1jFagzqWsSjRGVkMsdlcl3gEAANlN8WWimtY5kfV/hBSnqKomxdkq71z6AQAUlcRid6607aaoEs+noBaxI6J8q8mpb0gJqzlSC2VSl2ghfP9ZW834R9PBnRjGGGOsheFzYhhjjDHWLLWWTozT72LNGGOMMSYFHolhjDHGWhjiKXvNHo/EMMYYYy2MTaLpz9i4cSO6dOkCNzc3DBgwACdOnGjIqtxRi+/EyGQy7Nq167bzu3TpgjfffNNp7WGMMcZaqu3bt2P+/PlYunQpTp8+jb59+yIyMtKh2r6UnNaJuXLlCp555hn4+vrC3d0dffr0walTp4T5drsdS5YsQUBAANzd3REeHo7vv//eIUdJSQni4uKgVqvh7e2N+Ph4lJeXO2sVGGOMsWbhXo3ErF27FjNmzMBzzz2Hnj17Ij09HR4eHnjvvfcaukr1ckon5saNGxg8eDBcXFzwxRdf4MKFC1izZg3atm0rxKxatQrr169Heno6jh8/Dk9PT0RGRqK6+reaDHFxcTh//jz0ej327t2L7OxszJw50xmrwBhjjDUbdokmk8kEo9HoMJlM9ResqqmpQW5uLsLDw4Xn5HI5wsPDJbkBc32ccmLvG2+8gY4dO+L9998Xnuvatavwt91ux5tvvolFixbhiSeeAAB8+OGH8Pf3x65duxAbG4v8/HxkZmbi5MmTCA0NBQBs2LABY8eOxerVqxEYGEhqy9KlS/H222/jyy+/xEMPPQQAuHnzJiZPnozdu3fD29sbr776KhISEsjrV2WjVTSFnFbQjBJlpdZUkq4GHNxBq7rpRYyjNK5S4sJcpA1C7NoTC/tCTileSKwQqCDueBmhVpyNeOYftbKvW6n4QuXVtKJtViOxYq+r+DLtFbQKgdQidvJQWlE8ZdU3ojG2yl9JuSzV0hXFU5ppuajvNQobsYidlHdNVhCbX0n8vFPeuS3xsuaUlBQsX77c4bmlS5di2bJldWL/+9//wmq1wt/f3+F5f39/XLx4sVHa55SRmN27dyM0NBQTJ06En58f+vfvj3feeUeY/9NPP8FgMDj03jQaDQYMGCD03nJycuDt7S10YAAgPDwccrkcx48fF22D3W7HnDlz8OGHH+Lw4cNCBwYA/vGPf6Bv37749ttv8corr+Cll16CXq+XYtUZY4wxp7PJpJmSk5NRVlbmMCUnJ9/r1RM4ZSTmxx9/xKZNmzB//ny8+uqrOHnyJF588UW4urpi2rRpMBhuleeur/dWO89gMMDPz8+x8UolfHx8hJjbsVgseOaZZ/Dtt9/im2++wX333ecwf/DgwXjllVcAAA8++CCOHDmCtLQ0jBo1qkHrzRhjjN0LUo0KqVQqqFQqUmy7du2gUChQVFTk8HxRURG0Wq1ELXLklJEYm82Ghx9+GK+//jr69++PmTNnYsaMGUhPT3fG4jFv3jwcP34c2dnZdTowAKDT6eo8zs/PrzdXfccHbfaWOIjIGGOsuboXJ/a6uroiJCQEBw4c+K0dNhsOHDhQ5/9ZqTilExMQEICePXs6PBccHIzCwkIAEHpod+q9abXaOpdoWSwWlJSUiPbwRo0ahStXruDLL79s0HoAt44PajQah+k/xv80OC9jjDHW3M2fPx/vvPMOPvjgA+Tn52PWrFmoqKjAc8891yjLc0onZvDgwSgoKHB47tKlS+jcuTOAWyf5arVah96b0WjE8ePHhd6bTqdDaWkpcnNzhZiDBw/CZrNhwIABd1x+dHQ0tm7diueffx7btm2rM//YsWN1HgcHB9ebq77jg/er77/j8hljjDFnkurqpLv1l7/8BatXr8aSJUvQr18/5OXlITMzs87pIlJxyjkx8+bNw6BBg/D6669j0qRJOHHiBN5++228/fbbAG4VpJs7dy7+/ve/44EHHkDXrl2xePFiBAYGYvz48QBujdyMHj1aOAxlNpuRmJiI2NhY0pVJTz75JD766CNMmTIFSqUSTz31lDDvyJEjWLVqFcaPHw+9Xo8dO3Zg37599eap7/igXNbiawYyxhhrRqS80utuJSYmIjEx0SnLckon5pFHHsHOnTuRnJyMFStWoGvXrnjzzTcRFxcnxLz88suoqKjAzJkzUVpairCwMGRmZsLNzU2IycjIQGJiIkaOHAm5XI6YmBisX7+e3I6nnnoKNpsNU6ZMgVwux4QJEwAACxYswKlTp7B8+XKo1WqsXbsWkZGR0m0AxhhjjElOZrfbpS7E0eqM6TiGFOdOrhMjPrJDHftxldFqttyLOjEKQs0W6np6EGueuBHi3ImfCBUxzoNQUMadWHRGbafVT/FUiMepXGi52vpWkuLU94vnk7vR3hv0OjHi+1Ohpn3uFB3bkeKodWKsh8XrxNRcpNWJqSqmbbeKUlfxmAralSaVNbTtdtMu/lu4XE5rf6Wc9jk2EcIoMQBwk/glYyIcXDER61qlXN5KW2gDpHZ+RpI8r/z8sSR5GgvfxVoCVuKRw2rif0A1hP/c3YidEwuxj+pG6HhUyWiFyqjkhPV0JVbrUxC3B+Uor5lYmMuLeOq+nJCP+N2NKjuxU0ooVEb99WKqon1NWI3iRR9t1bTPgOUmbYPICBXNbNWEyn8AbJV3LtVQS1lxmBSnGBAiGuNiEq9xBQDWSiMpzmIS3741Jtr+VFpo7zUXwntNRbyC00R9f0tXsxIuxA+CuZndFbq1jE7wyRyMMcYYa5Z4JIYxxhhrYWytZCyGOzGMMcZYC9NaSrDy4STGGGOMNUs8EsMYY4y1MK3jYFIrGImRyWTYtWvXbed36dIFb775ptPawxhjjDW2e3HvpHvBKZ0Yq9WKxYsXo2vXrnB3d8f999+PlStX4vclaux2O5YsWYKAgAC4u7sjPDwc33//vUOekpISxMXFQa1Ww9vbG/Hx8SgvL3fGKjDGGGPNhk0mzdTUOaUT88Ybb2DTpk146623kJ+fjzfeeAOrVq3Chg0bhJhVq1Zh/fr1SE9Px/Hjx+Hp6YnIyEhUV1cLMXFxcTh//jz0ej327t2L7OxszJw50xmrwBhjjLEmxinnxBw9ehRPPPEEoqKiANw6hPPJJ5/gxIkTAG6Nwrz55ptYtGgRnnjiCQDAhx9+CH9/f+zatQuxsbHIz89HZmYmTp48idDQUADAhg0bMHbsWKxevZp0/yQAWLp0Kd5++218+eWXeOihhwAAN2/exOTJk7F79254e3vj1VdfRUJCAnn9qmziRb4AQEm8x5KCEGclFo+iVuxVUgq82WntryAWxXMhFLKzEvvZ5OO/hO2hIhYINBGL4slJq0As6kf93WET/2jLiWPFlhpiYcUq8XWQ1RC3bTltmZRdoDTRVlRRSYuzVZeQ4lzM4oXslI/0I+VSVZ0gxVlNJtEYUxWt4KCZWOzO1Sb+nqwmfncoiZ89BaVCt8SjCM1gUMJBa7nE2ikjMYMGDcKBAwdw6dIlAMB3332Hb775BmPG3CrX/9NPP8FgMCA8PFx4jUajwYABA5CTkwMAyMnJgbe3t9CBAYDw8HDI5XIcPy7+ZWG32zFnzhx8+OGHOHz4sNCBAYB//OMf6Nu3L7799lu88soreOmll6DX6yVZd8YYY8zZ7tVdrJ3NKSMxr7zyCoxGI3r06AGFQgGr1YrXXntNuAGkwXCr3Pcfb9Xt7+8vzDMYDPDz83OYr1Qq4ePjI8TcjsViwTPPPINvv/0W33zzDe677z6H+YMHD8Yrr7wCAHjwwQdx5MgRpKWlYdSoUX9+pRljjDHWqJzSifn000+RkZGBrVu3olevXsjLy8PcuXMRGBiIadOmNfry582bB5VKhWPHjqFdu7o3edPpdHUe3+6KJZPJBNMfhmxtdhvkxENFjDHGWGNrDlcWScEp//MmJSXhlVdeQWxsLPr06YMpU6Zg3rx5SElJAQBotVoAQFFRkcPrioqKhHlarRbFxcUO8y0WC0pKSoSY2xk1ahSuXLmCL7/8ssHrkpKSAo1G4zD9cvNyg/MyxhhjUrHBLsnU1DmlE1NZWQn5H85uVCgUsNlu9RW7du0KrVaLAwcOCPONRiOOHz8ujJLodDqUlpYiNzdXiDl48CBsNhsGDBhwx+VHR0dj69ateP7557Ft27Y6848dO1bncXBwcL25kpOTUVZW5jB1bNPljstnjDHGmPSccjhp3LhxeO2119CpUyf06tUL3377LdauXYvp06cDuFWQbu7cufj73/+OBx54AF27dsXixYsRGBiI8ePHAwCCg4MxevRozJgxA+np6TCbzUhMTERsbCzpyqQnn3wSH330EaZMmQKlUomnnnpKmHfkyBGsWrUK48ePh16vx44dO7Bv375686hUKqhUKofn+FASY4yxpqTpj6FIwymdmA0bNmDx4sWYPXs2iouLERgYiL/+9a9YsmSJEPPyyy+joqICM2fORGlpKcLCwpCZmQk3NzchJiMjA4mJiRg5ciTkcjliYmKwfv16cjueeuop2Gw2TJkyBXK5HBMmTAAALFiwAKdOncLy5cuhVquxdu1aREZGSrcBGGOMMSdqLefEyOx24oX57LaG3DeSFCdlnRgX4pFAap0YD5l4f1ZJXKacWD+FUieGup5uxLg2oNSJIaWCG6VWBQB3Qj534jdOGxutce428YRtZLR6Ie08K0lxPvdViMbIFMQ6MTdpv69IdWJUxDoxxDilmhQGl87igdQ6MeZsWp2YynzxOjHG626iMQBQUaESDwJQXuMivky7eAwAlNOKKqFKLr7jK4kD5JXEAjBVMvH3biUhBgBSLm+lLbQB5neJlSTP2st1T8FoSvgGkBKgFkGyEAvU2QkDgVZiP9tMjKNEKYlr6kboKACAnNDBkhEHRc3EuGrCmlqpReyI24PUMuIXrgux40QpXmgiFiAzmWlfE6ZyQoE9YiemuoL2nx6FsprYiXEhdmKoRfEqjaIxdmIRO5eBfUlx7pW5ojHmKlpxTmqRQxOhKJ6blfh9RXx/U+MoqJko33/yVnMQp+ngTgxjjDHWwrSW7hR3YhhjjLEWprWcE8OX1TDGGGOsWeKRGMYYY6yFoZxb2RJwJ4YxxhhrYfhwUguQlZUFmUyG0tLSeudfvnwZMpkMeXl5Tm0XY4wxxhquwZ2Y7OxsjBs3DoGBgZDJZNi1a1edGLvdjiVLliAgIADu7u4IDw/H999/7xBTUlKCuLg4qNVqeHt7Iz4+HuXl5Q4xZ86cwZAhQ+Dm5oaOHTti1apVDW0+Y4wx1uLwvZOIKioq0LdvX2zcuPG2MatWrcL69euRnp6O48ePw9PTE5GRkaiurhZi4uLicP78eej1euzduxfZ2dmYOXOmMN9oNCIiIgKdO3dGbm4u/vGPf2DZsmV4++23G7oKjDHGWItil2hq6hp8TsyYMWMwZsyY28632+148803sWjRIjzxxBMAgA8//BD+/v7YtWsXYmNjkZ+fj8zMTJw8eRKhoaEAbt2qYOzYsVi9ejUCAwORkZGBmpoavPfee3B1dUWvXr2Ql5eHtWvXOnR27qSyshIxMTEwGo0O90a6ePEiZs+ejdOnTyMoKAgbN27EsGHDyNug3CZeJbN2W1D4uniJxlAr2VKPi9bYraIxSkJV37thohT/k66mFZkrcaEKYlE8yj5QEJdpJm6PCkrlU+Kbw5Na7K6KUPVZSVuoqVq6YncWOW2Z1EJ8ymrxzwoAmKsIReCqaN8d7tXiRewAwGVQT9EYr+qzpFzmalpRvBpCsTuzjVitnPi95kL4LlURC+LR9iZgJaSjLpNJp9HPifnpp59gMBgQHh4uPKfRaDBgwADk5OQAAHJycuDt7S10YAAgPDwccrkcx48fF2KGDh0KV1dXISYyMhIFBQW4ceOGaDtKS0sxatQo2Gw26PV6eHt7C/OSkpKwYMECfPvtt9DpdBg3bhx+/fXXhq46Y4wxdk/w4SSJGAwGAIC/v7/D8/7+/sI8g8EAPz8/h/lKpRI+Pj4OMfXl+P0y7tSGYcOGISAgAHv27IGHh4fD/MTERMTExCA4OBibNm2CRqPB5s2b73JNGWOMsabBJtHU1LWKS6xHjRqFRx99FNu3b4dCUXfoU6fTCX8rlUqEhoYiPz+/3lwmkwkmk+MQsM1ug5x4c0fGGGOssbWWOjGN/j+vVqsFABQVFTk8X1RUJMzTarUoLi52mG+xWFBSUuIQU1+O3y/jdqKiopCdnY0LFy78+RX5f1JSUqDRaBymovL/a3BexhhjjN2dRu/EdO3aFVqtFgcOHBCeMxqNOH78uDACotPpUFpaitzc305eO3jwIGw2GwYMGCDEZGdnw2z+7WQzvV6P7t27o23btndsQ2pqKqZNm4aRI0fW25E5duyY8LfFYkFubi6Cg4PrzZWcnIyysjKHyd+rA2FLMMYYY87Bh5OIysvL8cMPPwiPf/rpJ+Tl5cHHxwedOnWCTCbD3Llz8fe//x0PPPAAunbtisWLFyMwMBDjx48HAAQHB2P06NGYMWMG0tPTYTabkZiYiNjYWAQGBgIAnn76aSxfvhzx8fFYuHAhzp07h3Xr1iEtLY3UztWrV8NqtWLEiBHIyspCjx49hHkbN27EAw88gODgYKSlpeHGjRuYPn16vXlUKhVUKpXDc3woiTHGWFPSWg4nNbgTc+rUKQwfPlx4PH/+fADAtGnTsGXLFgDAyy+/jIqKCsycOROlpaUICwtDZmYm3NzchNdlZGQgMTERI0eOhFwuR0xMDNavXy/M12g02L9/PxISEhASEoJ27dphyZIl5MurASAtLc2hI1N7pVNqaipSU1ORl5eHoKAg7N69G+3atWvIZmGMMcZYI5PZqcVL2G09HBBGimvKdWIoPIh1Ytxk4nUjAEBGqI2iIo5yKYnbQ0WIo9aJ8QJtPVWE3e5BrC/hTSxq4UpYpqeN9u7wk9FqmbTzrhCNodaJqShXiQcRKaSuE6Ok7QSlq/hy3dS0WizuXWnvNZeB4nViTAdpdWJuXHQVDwJgNLqJxtw00XKVglYfqJLwvVCuoH2mKomlXaoIXzHVxNGPJT9n0BbaANO6xEiS54PL/5YkT2NpFVcnNbYam4UUZ6UUdwNQaqkUjVHJaR92uYSF21yIHQoLsbOmJLTNTuwouElYY0pBXGY5tUwWoVNHWyJQQTxyKSO81czEYn3VNlrrakziXyeWGtp7o7qG9tVkI3T+5DLaMmXEOKWC9jlWVovHWWqInykTrbPjVX1ONEY1uDspl6b6IinO8oP4OlistPWsIhZWtBI6C1XEHwbEjwHkhLeHrQnVurO1kvEJPpmDMcYYY80Sj8QwxhhjLUzrGIfhTgxjjDHW4jSHWwZIgQ8nMcYYY6xZ4pEYxhhjrIVpLXViWvRITFZWFmQyGUpLS+udf/nyZchkMuTl5Tm1XYwxxlhjai0VexvcicnOzsa4ceMQGBgImUyGXbt2Ocw3m81YuHAh+vTpA09PTwQGBmLq1Km4evWqQ1xJSQni4uKgVqvh7e2N+Ph4lJeXO8ScOXMGQ4YMgZubGzp27IhVq1Y1tPmMMcZYi2ODXZKpqWtwJ6aiogJ9+/bFxo0b651fWVmJ06dPY/HixTh9+jQ+//xzFBQUIDo62iEuLi4O58+fh16vx969e5Gdne1QjddoNCIiIgKdO3dGbm4u/vGPf2DZsmV4++23G7oKjDHGGGuGGnxOzJgxYzBmzJjbztdoNNDr9Q7PvfXWW3j00UdRWFiITp06IT8/H5mZmTh58iRCQ0MBABs2bMDYsWOxevVqBAYGIiMjAzU1NXjvvffg6uqKXr16IS8vD2vXriXfeqCyshIxMTEwGo3Yt2+f8PzFixcxe/ZsnD59GkFBQdi4cSOGDRtG3gbUY48KYrE4Su/XbKcVWrMRC+x5KcQrpFbbaUX9qAX27IQicJSqvgBgI/5gcKHsK2LBKmrFXhNhUJZacdhErMylJKRTEouBVRPbRilQp6BUDANQZaUWuxOPkRP3p4z6ObZSP+/icTVm2nvITI2rFi+Kp66kFbFzD+tGimtb/aNojOVHYsFEK3E9beIVgF2I3wnuxDgT4X1ELVrpDHxOTCMqKyuDTCaDt7c3ACAnJwfe3t5CBwYAwsPDIZfLcfz4cSFm6NChwv2OACAyMhIFBQW4ceOG6DJLS0sxatQo2Gw26PV6YdkAkJSUhAULFuDbb7+FTqfDuHHj8Ouvv0qzsowxxpiT8TkxjaS6uhoLFy7E5MmToVarAQAGgwF+fn4OcUqlEj4+PjAYDEKMv7+/Q0zt49qY2zEYDBg2bBgCAgKwZ88eeHh4OMxPTExETEwMgoODsWnTJmg0GmzevLlB68kYY4yxxuXUS6zNZjMmTZoEu92OTZs2OW25o0aNwqOPPort27dDoag74KfT6YS/lUolQkNDkZ+fX28uk8kEk8nxhng2uw1y4qEixhhjrLG1lns7O+1/3toOzM8//wy9Xi+MwgCAVqtFcXGxQ7zFYkFJSQm0Wq0QU1RU5BBT+7g25naioqKQnZ2NCxcuNHg9UlJSoNFoHKb/VlwVfyFjjDHmJE356qTLly8jPj4eXbt2hbu7O+6//34sXboUNTU1d53LKZ2Y2g7M999/j6+++gq+vr4O83U6HUpLS5Gbmys8d/DgQdhsNgwYMECIyc7Ohtn824lrer0e3bt3R9u2be+4/NTUVEybNg0jR46styNz7Ngx4W+LxYLc3FwEBwfXmys5ORllZWUOUzvPQPGNwBhjjDFcvHgRNpsN//rXv3D+/HmkpaUhPT0dr7766l3navDhpPLycvzwww/C459++gl5eXnw8fFBp06dYDab8dRTT+H06dPYu3cvrFarcA6Lj48PXF1dERwcjNGjR2PGjBlIT0+H2WxGYmIiYmNjERh4q4Pw9NNPY/ny5YiPj8fChQtx7tw5rFu3DmlpaaR2rl69GlarFSNGjEBWVhZ69OghzNu4cSMeeOABBAcHIy0tDTdu3MD06dPrzaNSqaBSOV7Jw4eSGGOMNSVN+aTc0aNHY/To0cLjbt26oaCgAJs2bcLq1avvKleDOzGnTp3C8OHDhcfz588HAEybNg1btmzBlStXsHv3bgBAv379HF779ddf47HHHgMAZGRkIDExESNHjoRcLkdMTAzWr18vxGo0Guzfvx8JCQkICQlBu3btsGTJEvLl1QCQlpbm0JGpvdIpNTUVqampyMvLQ1BQEHbv3o127dr9mc3BGGOM3XNSXWJd33mg9f2Yb6iysjL4+Pjc9etk9tZy9k8j6uU/gBRHrXniKhfvWyoJNVYAaevEUJdJrRPjSshHiQEAF+KRUUqcO3GZ1DoxroTt4Wmntd+TWNvFk/Cp9qKVGoKPlVYfyM+lWjSGWifmZo14HRDgHtWJIdR/oca5KGg7wd1NvP4LALh5EOrEdKKdd0CtE1PxlXidmOs/epFylVa6keJuEOrElNZzEUd9qokD6aWEuErie2Pxzxm0hTbA452iJMkTOv0RLF++3OG5pUuXYtmyZZLkB4AffvgBISEhWL16NWbMmHFXr+UbQEqgxkb7kqeyEjoeLnLif6Ay2i6uJqyDK/ED6kLsBFgJ/7mbqZXniGGU7ysrtbgbecBWPJ8LddtSO4iEdBXEL28vGy3QQoizEjdZtZ3YSSfEKIi/0yjvx7vJpyB0ilyI29ZMLQJHKIpn/Q+x6CahiB0AeAzrKBrja/4/Uq6an2jraTGJr4ON+F6zU3+MEN4eZmIxSmeQ6qTc5ORk4QhLrduNwrzyyit444037pgvPz/f4XSOK1euYPTo0Zg4ceJdd2AA7sQwxhhjLY5UB1nu5tDRggUL8Oyzz94xplu330b4rl69iuHDh2PQoEF/+hZC3IlhjDHGWph7cWJv+/bt0b59e1LslStXMHz4cISEhOD999+HXP7nLpDhTgxjjDHGnObKlSt47LHH0LlzZ6xevRrXr18X5onVffsj7sQwxhhjLUxTvgGkXq/HDz/8gB9++AEdOnRwmHe3h8G4wAljjDHWwjTlir3PPvss7HZ7vdPdatGdmKysLMhkMpSWltY7//Lly5DJZMjLy3NquxhjjDHWcA3uxGRnZ2PcuHEIDAyETCbDrl277hj/wgsvQCaT4c0333R4vqSkBHFxcVCr1fD29kZ8fDzKy8sdYs6cOYMhQ4bAzc0NHTt2xKpVqxrafMYYY6zFud1Ix91OTV2DOzEVFRXo27cvNm7cKBq7c+dOHDt2TLiVwO/FxcXh/Pnz0Ov12Lt3L7Kzsx2q8RqNRkRERKBz587Izc3FP/7xDyxbtuxPX5bFGGOMtVRN+XCSlBp8Yu+YMWMwZswY0bgrV65gzpw5+PLLLxEV5VhJMD8/H5mZmTh58iRCQ0MBABs2bMDYsWOxevVqBAYGIiMjAzU1NXjvvffg6uqKXr16IS8vD2vXriXfeqCyshIxMTEwGo3Yt2+f8PzFixcxe/ZsnD59GkFBQdi4cSOGDRtG3gaU4nRSUxArvJZZK0lx7grxCphWuQspl5Rveys5G+2tTKneqiTuTzu1ciuhiFo1cT3l5IKD4suklfgCLMQCXtXEgmwUVcT7kRGLDpNQf9FRithRUT/HbsQqgTWE8rM1Ftp+qjHRPlM+JvFCdm2G+ZFy+dVcFw8CYPuF8Jmqoa2nijja4EGolm1p+v/ntzhOOSfGZrNhypQpSEpKQq9everMz8nJgbe3t9CBAYDw8HDI5XIcP35ciBk6dKhwvyMAiIyMREFBAW7cuCHahtLSUowaNQo2mw16vR7e3t7CvKSkJCxYsADffvstdDodxo0bh19//bUBa8wYY4zdO3aJ/jV1TunEvPHGG1AqlXjxxRfrnW8wGODn59hTVyqV8PHxEe54bTAY4O/v7xBT+7g25nYMBgOGDRuGgIAA7NmzBx4eHg7zExMTERMTg+DgYGzatAkajQabN2++q3VkjDHGmgqb3S7J1NQ1ep2Y3NxcrFu3DqdPn4bsHt1XYtSoUXj00Uexfft2KOq5KZhOpxP+ViqVCA0NRX5+fr256rujp91ug4w4/M0YY4wxaTT6/7yHDx9GcXExOnXqBKVSCaVSiZ9//hkLFixAly5dANyq0FdcXOzwOovFgpKSEqF6n1arRVFRkUNM7WOxCn9RUVHIzs7GhQsXGrw+KSkp0Gg0DlNpVZH4CxljjDEnsUs0NXWN3omZMmUKzpw5g7y8PGEKDAxEUlISvvzySwC3RkJKS0uRm5srvO7gwYOw2WwYMGCAEJOdnQ2z+bdbzev1enTv3h1t27a9YxtSU1Mxbdo0jBw5st6OzLFjx4S/LRYLcnNzERwcXG+u5ORklJWVOUze7v71xjLGGGP3Al+dRFReXo4ffvhBePzTTz8hLy8PPj4+6NSpE3x9feHr6+vwGhcXF2i1WnTv3h0AEBwcjNGjR2PGjBlIT0+H2WxGYmIiYmNjhcuxn376aSxfvhzx8fFYuHAhzp07h3Xr1iEtLY3UztWrV8NqtWLEiBHIyspyuBX4xo0b8cADDyA4OBhpaWm4ceMGpk+fXm+e+u7oyYeSGGOMNSXNoQMihQZ3Yk6dOoXhw4cLj+fPnw8AmDZtGrZs2ULOk5GRgcTERIwcORJyuRwxMTFYv369MF+j0WD//v1ISEhASEgI2rVrhyVLlpAvrwaAtLQ0h45M7ZVOqampSE1NRV5eHoKCgrB79260a9eOnJcxxhhjziezN4eSfE1ct3b9Jc2nIIzsqIg1W0w2s3gQaHViqMt0k9HiXGXidRwo2wIAVDJaf1xFyOdO7NsriSeqexLyuRGP7HoQ49Q28bZ5ED/5fsTiF+1sNbSEBGXE91DzrxNDy+UGWp0YF5l4nLvSQsrloaJ9d/jcVyEaQ60TU3aAVifG8ItaNOa/Ne60Zcpp9WSMCvHPlJH4JppX+DEtsAEGBj4mSZ5jV7MkydNY+C7WEvi12kiK83JxI8VRLmuzKWlffq5y2i6usYl/sckIRdsAQCGnfZJNdvFluhE7J9T/geyEEm+UzhUAULv/VkKBOrPEQ78mwjJVhOJdAFBOqRAIwMMmvt2oHYVqYgeRUojPSrwoUk7cBdRrLJWENwjh/0UAQA2xKJ4LYZnVZtr722ShffYsheJtsxE7J+ohdz6/sZb1YJloTM0V2nqarSrxIAAmwveCy725ALdereVwEp/MwRhjjLFmiUdiGGOMsRamOVTblQJ3YhhjjLEWprWc7sqHkxhjjDHWLPFIDGOMMdbCtJYTe7kTwxhjjLUwfDipBbh8+TJkMhny8vJuGyOTybBr1y6ntYkxxhhj0mhwJyY7Oxvjxo1DYGDgHTsE+fn5iI6OhkajgaenJx555BEUFhYK86urq5GQkABfX194eXkhJiamzg0fCwsLERUVBQ8PD/j5+SEpKQkWC61wE2OMMdZa8L2TiCoqKtC3b19Mnz4dEyZMqDfmP//5D8LCwhAfH4/ly5dDrVbj/PnzcHP7rfjbvHnzsG/fPuzYsQMajQaJiYmYMGECjhw5AgCwWq2IioqCVqvF0aNHce3aNUydOhUuLi54/fXXG7oaDUItYkcpKAfQqtTKicXArHZapU/KMqustIqs1Cq7ckLZMDmx0Bp1PUEoWFVNrANL/QXgQlhPC/HLgpILAGyE94eNuAIexKJ4Zsr7lrieJuJ+p+x1arE7QpFjAPT9LidsNxdqgT3i551SYM+NuD+pBfas1YT39y/EXIQidgDQdoin+DIP3CTlMhfR2mYhvEEonwFn4UusicaMGYMxY8bcMeZvf/sbxo4di1WrVgnP3X///cLfZWVl2Lx5M7Zu3YoRI0YAAN5//30EBwfj2LFjGDhwIPbv348LFy7gq6++gr+/P/r164eVK1di4cKFWLZsmXAfpDuxWq2YMWMGjh49iv3796NTp04AgGvXrmHMmDHIyspCQEAAVq1ahaeeeurPbA7GGGPsnqNUfm8JGr3baLPZsG/fPjz44IOIjIyEn58fBgwY4HDYKTc3F2azGeHh4cJzPXr0QKdOnZCTkwMAyMnJQZ8+feDv7y/EREZGwmg04vz586LtMJlMmDhxIvLy8nD48GGhAwMAixcvRkxMDL777jvExcUhNjYW+fn5Eqw9Y4wxxhpLo3diiouLUV5ejtTUVIwePRr79+/Hk08+iQkTJuDQoUMAAIPBAFdXV3h7ezu81t/fHwaDQYj5fQemdn7tvDspLy9HVFQUrl+/jq+//hrt27d3mD9x4kQ8//zzePDBB7Fy5UqEhoZiw4YN9eYymUwwGo0Ok516KIMxxhhzArtE/5q6Rr/E2ma79R/8E088gXnz5gEA+vXrh6NHjyI9PR3Dhg1r7CZg8uTJ6NChAw4ePAh397p3NtXpdHUe3+6KppSUFCxfvtzhOS9VO6jdaXdpZYwxxhobH06SSLt27aBUKtGzZ0+H54ODg4Wrk7RaLWpqalBaWuoQU1RUBK1WK8T88Wql2se1MbczduxYnDlzRjg01RDJyckoKytzmNq4tWtwXsYYY4zdnUbvxLi6uuKRRx5BQUGBw/OXLl1C586dAQAhISFwcXHBgQMHhPkFBQUoLCwURkl0Oh3Onj2L4uJiIUav10OtVtfpIP3RrFmzkJqaiujoaOEQ1u8dO3aszuPg4OB6c6lUKqjVaodJ1oTOSGeMMcb4cBJReXk5fvjhB+HxTz/9hLy8PPj4+AgnzyYlJeEvf/kLhg4diuHDhyMzMxN79uxBVlYWAECj0SA+Ph7z58+Hj48P1Go15syZA51Oh4EDBwIAIiIi0LNnT0yZMgWrVq2CwWDAokWLkJCQAJVKJdrOOXPmwGq14vHHH8cXX3yBsLAwYd6OHTsQGhqKsLAwZGRk4MSJE9i8eXNDNw1jjDF2T7SWw0kN7sScOnUKw4cPFx7Pnz8fADBt2jRs2bIFAPDkk08iPT0dKSkpePHFF9G9e3f8+9//duhIpKWlQS6XIyYmBiaTCZGRkfjnP/8pzFcoFNi7dy9mzZoFnU4HT09PTJs2DStWrCC3de7cubDZbBg7diwyMzMxaNAgAMDy5cuxbds2zJ49GwEBAfjkk09ER3cYY4wxdm/J7K3lBguNSO3ZjRTn4SI+YgQAHkrx4nnUK6KqrWZSnLerl2gMtYidm9yFFCcnFUejFeZypy6TkM+VUBAPADyJy3SFeD434jK9CLkAwJMQpyIWPWtHrALnbxH/KlEQv25+VRKLoxGaRr12kBpHPXhMyUf9FSkjfktT2qYi5nIh7isPm3ich51WQNJTTisIGuBnFI1pN5i2dYuyaHu+8FeNaMyvxO+EidcySHEN8UD7EEnyfH89V5I8jYVvAMkYY4y1MK3lcBKfkcoYY4yxZolHYhhjjLEWpjlcWSQF7sQwxhhjLUxrqSTPnRjGGGOshbG1kpEYPieGMcYYY81Si+7EXL58GTKZ7Lb3QQIAmUzmcEdtxhhjrLmz2+2STE1dgzsx2dnZGDduHAIDA2/bISgvL0diYiI6dOgAd3d39OzZE+np6Q4x1dXVSEhIgK+vL7y8vBATE1PnXkmFhYWIioqCh4cH/Pz8kJSUBIuFVleAMcYYay1ssEsyNXUNPiemoqICffv2xfTp0zFhwoR6Y+bPn4+DBw/i448/RpcuXbB//37Mnj0bgYGBiI6OBgDMmzcP+/btw44dO6DRaJCYmIgJEybgyJEjAACr1YqoqChotVocPXoU165dw9SpU+Hi4oLXX3+9oavRIGYbrZCTyUIrPEeJUyqIxdEIhfMAoNpqEo2REwuyVVlrSHEaFw/RGFcZ7S1abad1Zl0kHHyUEYvFyWSEOGoBMkouAK6U9aSlgpW4TBMljpyLFEaOo6C+M8zEZboQ9qn4p+4WJXGZcsIyqad7kvYnADMhzkr7ikSNjVjksEi88Jw16yYpl98QUhgsX5eLxtSUqWnJmGQa/I0+ZswY/P3vf8eTTz5525ijR49i2rRpeOyxx9ClSxfMnDkTffv2xYkTJwAAZWVl2Lx5M9auXYsRI0YgJCQE77//Po4ePSrcnHH//v24cOECPv74Y/Tr1w9jxozBypUrsXHjRtTU0P7TtFqtmD59Onr06CHcQRsArl27hjFjxsDd3R3dunXDZ5991oAtwhhjjN1bfDhJQoMGDcLu3btx5coV2O12fP3117h06RIiIiIAALm5uTCbzQgPDxde06NHD3Tq1Ak5OTkAgJycHPTp0wf+/v5CTGRkJIxGI86fPy/aBpPJhIkTJyIvLw+HDx8Wbk4JAIsXL0ZMTAy+++47xMXFITY2Fvn5+VKtPmOMMeZUNrtdkqmpc8ol1hs2bMDMmTPRoUMHKJVKyOVyvPPOOxg6dCgAwGAwwNXVFd7e3g6v8/f3h8FgEGJ+34GpnV87707Ky8sRFRUFk8mEr7/+GhqN41DkxIkT8fzzzwMAVq5cCb1ejw0bNjjcgLKWyWSCyeQ4CGy322mHDBhjjDEmGaeMxGzYsAHHjh3D7t27kZubizVr1iAhIQFfffWVMxaPyZMno6KiAvv376/TgQEAnU5X5/HtRmJSUlKg0WgcJoulrFHazRhjjP0Zdon+NXWN3ompqqrCq6++irVr12LcuHF46KGHkJiYiL/85S9YvXo1AECr1aKmpgalpaUOry0qKoJWqxVi/ni1Uu3j2pjbGTt2LM6cOSMcmmqI5ORklJWVOUxKpfhJZowxxpiz8DkxEjGbzTCbzZDLHRelUChgs906Tz4kJAQuLi44cOCAML+goACFhYXCKIlOp8PZs2dRXFwsxOj1eqjVavTs2fOObZg1axZSU1MRHR2NQ4cO1Zlfe/Lw7x8HBwfXm0ulUkGtVjtMfCiJMcYYc74GnxNTXl6OH374QXj8008/IS8vDz4+PujUqRPUajWGDRuGpKQkuLu7o3Pnzjh06BA+/PBDrF27FgCg0WgQHx+P+fPnw8fHB2q1GnPmzIFOp8PAgQMBABEREejZsyemTJmCVatWwWAwYNGiRUhISIBKpRJt55w5c2C1WvH444/jiy++QFhYmDBvx44dCA0NRVhYGDIyMnDixAls3ry5oZuGMcYYuyeaQ40XKTS4E3Pq1CkMHz5ceDx//nwAwLRp07BlyxYAwLZt25CcnIy4uDiUlJSgc+fOeO211/DCCy8Ir0tLS4NcLkdMTAxMJhMiIyMdTqxVKBTYu3cvZs2aBZ1OB09PT0ybNg0rVqwgt3Xu3Lmw2WwYO3YsMjMzMWjQIADA8uXLsW3bNsyePRsBAQH45JNPREd3GGOMsaaqORwKkoLM3lrWtBEpXe8jxXm7eZLiFDLxo3wqhQspF7UQn5xwSKwNoTgdAFQRCucBgJfSXTRGJaetJ7UonjshnxsxF5VaLj5SSD2u6yWjbQ8VIWMb0IoX+hILkPkS3mrUA683aE1DpUz860tBXKpV4l+ucsJypT4QTdlTCuJqUt+TSkI+D+Iy3YiV+Lxs4oFtiAUwAzXiRewAIHCIeBHSwq/FP+sA0PvHvaS4hvBp84AkeUpufi9JnsbSou+dxBhjjLGWyyl1YhhjjDHmPK3lIAt3YhhjjLEWprWc2MuHkxhjjDF2T5hMJvTr1w8ymQx5eXl3/XruxDDGGGMtTHMpdvfyyy8jMDDwT7+eOzGMMcZYC9McbgD5xRdfYP/+/UL1/j+jRXditmzZUuemkr+XlZUFmUxW53YHjDHGGLt1uMdoNDpMf7wJ8p9RVFSEGTNm4KOPPoKHB618R30a3IlJSUnBI488gjZt2sDPzw/jx49HQUGBQ0x1dTUSEhLg6+sLLy8vxMTE1LkPUmFhIaKiouDh4QE/Pz8kJSXBYnG8zj8rKwsPP/wwVCoVgoKChGJ6jDHGGPuNVDeArO+mxykpKQ1rm92OZ599Fi+88AJCQ0MblKvBVycdOnQICQkJeOSRR2CxWPDqq68iIiICFy5cgKfnreJu8+bNw759+7Bjxw5oNBokJiZiwoQJOHLkCADAarUiKioKWq0WR48exbVr1zB16lS4uLjg9ddfB3DrdgZRUVF44YUXkJGRgQMHDuD5559HQEAAIiMjG7oaDdLGVbxoGwBUW8SLJQGAp4t4waQaG62QE/WYplwmXl2MukyV3JUUJyOU+rLYacX6qG2jFBK02mkVtzyJ61lhF9/vroTtDwCuoLXNhbBtTcRcFsI2AwATIYxQmw4AUE2sAkdZA2oROwt5mdQhdumG4imF86ho5RIBJXGZlFvH2YibwkxcTTPhPWm20dbUUqamxX0tXhSvg66SlMsZpDoUlJycLFTir3W7W/288soreOONN+6YLz8/H/v378fNmzeRnJzc4PZJXrH3+vXr8PPzw6FDhzB06FCUlZWhffv22Lp1K5566ikAwMWLFxEcHIycnBwMHDgQX3zxBR5//HFcvXoV/v7+AID09HQsXLgQ169fh6urKxYuXIh9+/bh3LlzwrJiY2NRWlqKzMzMetuyZcsWzJ07VzhcdP36dYwZMwYdO3bEtm3bkJOTg+HDh2Pv3r1ITk7GpUuX0K9fP7z77rvo3bs3eZ3begWR4qjVcymdGOpNJ6m7VykX/09UpaD9p62k/ocsF+9DUyoJA/QPrEYpPmxJrfBK7cRQOk7UTkwbYsVeT0I1XjfiQKyfnfZbx5vQo6B2YkqIFXsthI4C9QtO+k6MdCTtxBCbT+7EEGLcictUSVjZ14vYc1ITv5cDPaXrxHhnHCTFNYS7e2dJ8lRV/UyOvX79On799dc7xnTr1g2TJk3Cnj17HP4fs1qtUCgUiIuLwwcffEBepuTnxJSVlQEAfHx8AAC5ubkwm80IDw8XYnr06IFOnTohJycHAJCTk4M+ffoIHRgAiIyMhNFoxPnz54WY3+eojanNIeaXX37BkCFD0Lt3b3z22WcOPcmkpCSsWbMGJ0+eRPv27TFu3DiYzbRRE8YYY6ypuRdXJ7Vv3x49evS44+Tq6or169fju+++Q15eHvLy8vC///u/AIDt27fjtddeu6tlSlrszmazYe7cuRg8eLAwkmEwGODq6lrnBFt/f38YDAYh5vcdmNr5tfPuFGM0GlFVVQV399sf0ikoKMCoUaPw5JNP4s0336wzirF06VKMGjUKAPDBBx+gQ4cO2LlzJyZNmlQnl8lkqnNSk91uJ4+MMMYYY43N3oSL3XXq1MnhsZeXFwDg/vvvR4cOHe4ql6QjMQkJCTh37hy2bdsmZdoGqaqqwpAhQzBhwgSsW7eu3s6GTqcT/vbx8UH37t2Rn59fb776TnKqNt9otPYzxhhjd6u51IlpKMk6MYmJidi7dy++/vprh56UVqtFTU1NncuYi4qKoNVqhZg/Xq1U+1gsRq1W33EURqVSITw8HHv37sWVK1f+9PrVSk5ORllZmcPk5tK2wXkZY4yx1qhLly6w2+3o16/fXb+2wZ0Yu92OxMRE7Ny5EwcPHkTXrl0d5oeEhMDFxQUHDhwQnisoKEBhYaEwAqLT6XD27FkUFxcLMXq9Hmq1Gj179hRifp+jNub3oyj1kcvl+OijjxASEoLhw4fj6tWrdWKOHTsm/H3jxg1cunQJwcHB9eZTqVRQq9UOEx9KYowx1pTwSAxRQkICPv74Y2zduhVt2rSBwWCAwWBAVVUVAECj0SA+Ph7z58/H119/jdzcXDz33HPQ6XQYOHAgACAiIgI9e/bElClT8N133+HLL7/EokWLkJCQIJyA+8ILL+DHH3/Eyy+/jIsXL+Kf//wnPv30U8ybN0+0jQqFAhkZGejbty9GjBghnGdTa8WKFThw4ADOnTuHZ599Fu3atcP48eMbumkYY4yxe8Iu0dTUNbgTs2nTJpSVleGxxx5DQECAMG3fvl2ISUtLw+OPP46YmBgMHToUWq0Wn3/+uTBfoVBg7969UCgU0Ol0eOaZZzB16lSsWLFCiOnatSv27dsHvV6Pvn37Ys2aNXj33XfJNWKUSiU++eQT9OrVCyNGjHAY9UlNTcVLL72EkJAQGAwG7NmzB66utMtnGWOMMXaP2Jnkqqur7UuXLrVXV1c3qVxS52uquaTO11raxut57/M11VxS52uquaTOJ3XbWF2SF7tjgNFohEajQVlZGdRqWjVIZ+Rqym1rLevZlNvG63nv8zXVXE25ba1lPVn9WvQNIBljjDHWcnEnhjHGGGPNEndiGGOMMdYscSemEahUKixduvS2d/q8V7mkztdUc0mdr7W0jdfz3udrqrmkztdUc0mdT+q2sbr4xF7GGGOMNUs8EsMYY4yxZok7MYwxxhhrlrgTwxhjjLFmiTsxjDHGGGuWlPe6AS3JqVOnkJ+fDwAIDg5GaGjon8pz48YNbN682SHX9OnT4ePjc8/zWa1W7Ny50yHX+PHjoVTe/VtJylxS5ysoKMCGDRsccs2ZMwfdu3f/U22TMp+U+1Pq9xoAFBcXo6CgAADQvXt3+Pn5/ak8Um4zqfdnU9WUvzukzNWUP59Sf68xEff2rgctwy+//GIPCwuzy2Qye9u2be1t27a1y2Qy++DBg+2//PLLXeU6dOiQXaPR2Dt27Gh/8skn7U8++aS9U6dOdrVabT906NBdt03KfOfOnbN369bN7uHhYe/fv7+9f//+dk9PT3uXLl3sZ8+evWe5pM732Wef2ZVKpX3gwIH2efPm2efNm2fX6XR2pVJp/+yzz+66bVLmk3J/Sv1eMxqN9meeecauVCrtMpnMLpPJ7Eql0h4XF2cvLS29q1xSbjOp96fdbrd/9dVX9qioKHu3bt3s3bp1s0dFRdn1ev09zdWUvzukzNWUP59Sf68xcdyJkUBkZKR9wIAB9osXLwrPXbx40a7T6eyRkZF3lat37972GTNm2C0Wi/CcxWKxz5w50967d++7bpuU+QYOHGgfN26cvaSkRHiupKTEHh0dbdfpdPcsl9T5unXrZl+8eHGd55csWWLv1q3bXbdNynxS7k+p32uTJk2yP/DAA/bMzEx7WVmZvayszJ6ZmWnv3r27/S9/+ctd5ZJym0m9Pzdu3GhXKpX22NhY+7p16+zr1q2zT5482e7i4mJ/66237lmupvzdIWWupvz5lPp7jYnjTowE3Nzc7KdPn67z/KlTp+zu7u53nev3naFaFy9etLu5uf2ptkmVz83NzX7u3Lk6z589e/ae5pI6n7u7u/3777+v8/ylS5fuen9KnU/q/Snle83Dw8N++PDhOs9nZ2fbPTw87iqXlNtM6v1533332Tds2FDn+bfeesseGBh4z3I19e8OqXI19c+nlN9rTByf2CuBjh07wmw213nearUiMDDwrnI9/PDDwrHU38vPz0ffvn3vum1S5nvwwQdRVFRU5/ni4mIEBQXds1xS53vsscdw+PDhOs9/8803GDJkyF23Tcp8Uu5Pqd9rvr6+0Gg0dZ7XaDRo27btXeWScptJvT9LS0sxevToOs9HRESgrKzsnuVqyt8dUuZqyp9Pqb/XmDg+00gC//jHPzBnzhxs3LhROJn31KlTeOmll7B69WrR1585c0b4+8UXX8RLL72EH374AQMHDgQAHDt2DBs3bkRqaiqpPVLmMxqNwt8pKSl48cUXsWzZModcK1aswBtvvOHUXFLn2717t/B3dHQ0Fi5ciNzcXIdcO3bswPLly0ltkzKflPtT6vfa7y1atAjz58/HRx99BK1WCwAwGAxISkrC4sWLRV8v5TaTen/+XnR0NHbu3ImkpCSH5//nf/4Hjz/+uFNzNeXvDilzNeXPp9Tfa+zu8G0H/qS2bdtCJpMJjysqKmCxWIQz0Gv/9vT0RElJyR1zyeVyyGQyiO0KmUwGq9Uq2jYp89XmqlWbs/a53z92Zq7GaBvF3bRNqnyNsT+leq/179/fYR98//33MJlM6NSpEwCgsLAQKpUKDzzwAE6fPi3aNop7sT/Xr18v/G00GrF69WoMHjwYOp0OwK3/qI4cOYIFCxZg0aJFTsvVHL47pMpFcS8/n7Ua+r3G7g53Yv6kDz74gBw7bdq0O87/+eefybk6d+4sGiNlvkOHDpFzDRs2zGm5GiNfUyXl/pT6vXY3IxlLly4lxzY1Xbt2JcXJZDL8+OOPTsvVlL87pG5bU9VavoeaLCefg9PqVFZW3lV8WVnZbefVd/KZM/PV1NTcdt71/7+9Mw+K6sz68Olm37QFRHBhixhAQQgKiAqYKIsbItEQF6yK+5KMZoxLjUtVdBxcKtFRI4mjTBmXqCMa9yWKZVwiigEslrihCIoQI4oiDNi/7w+L/mxp9DZ9gMvkfaq6Kn3f8Hh4l9Mvt+89t7S02VzcvjfdFn/hwgW9XNw+zvHknmtvQq1W6/X/c/YZ93jKFTnnDk6XnNcnd14TvB2xiWHg008/1Xn86dOnCA8P18vVp08fPH/+vM7xvLw8dOjQQe/YOH3Dhw/X+WFUXFyMrl27NpuL2+fl5YWHDx/WOX727Fm0bt1a79g4fZzjyT3XVqxYofN4TU0N4uPj9XJx9hn3eL6p3sfevXubzSXn3MHpkvP65M5rgrcj7k5i4NChQ3VOlT979oyioqKopqZGL5e1tTUNHz5c6+dyc3MpPDyc4uLi9I6N01dQUEATJkzQOnb//n0KDw8nT0/PZnNx+4KDgykiIoLKy8s1x86cOUMDBw5s0FcinD7O8eSeaytXrqRNmzZpHXvx4gXFx8dTRkaGXi7OPuMez8jISMrPz69zfM+ePTR69Ohmc8k5d3C65Lw+ufOaQALNvYv6X+DGjRtwcnLC119/DeBl5dJevXqhb9++ePr0qV6uiooKhISEYOTIkVCr1bh69SocHBwwa9asBsXG6SspKYGnp6fmZ4uKitClSxeMGDECL168aDYXt+/FixeIjY1FWFgYKisrcerUKVhbW2P16tV6x8Xt4xxP7rmWlpYGlUqF3bt3AwCqq6sRGxsLLy8v3L9/Xy8XZ59xj2dtEbRXf6cffvgBlpaW2LVrV7O55Jw7OF1yXp/ceU3wdsQmhonMzEzY2tpizZo1CA4ORlhYmN4bmFoePXqE7t2748MPP4SDgwNmz55tUGycvoKCAjg7O2PWrFnw8PDARx99pFWFs7lc3L6qqir0798fISEhsLa21lmQrLl8nOPJPddOnjwJGxsb/Pjjjxg6dCi8vb1RXFzcIBdnn3GP54wZM9C1a1c8fPgQ27Ztg4WFRYMfYcDpknPu4HTJeX1y5zXBmxF3JzFy4cIFGjBgAAUFBdHBgwfJwsJC0s+9Wmeglvv379OAAQNo8ODBWnUUWrVq1eS+17l27Rr17duXBgwYQN9//73W7YXN6TLE92pNi1rKy8vp448/pkGDBtHUqVM1x319fZvUxzmejT03iIj27dtHI0aMIC8vLzp16hTZ29tL+jnOPuMeT12MHj2aLl26REVFRbR9+3aKiYlpkMcQl5xzB6dLzutTF9x5TVA/YhPTQF6vj1HLnTt3yMHBQWsDI6U+hi4XXqkvAKDB9VMM8b1eD6eWiooKMjMzIyMjI82xt9XD4XRx+3TVtHj1fUPHgMPHOZ7cc2348OE6j//yyy/UuXNnrQ1MSkqKpNg4+4xrPF8tjlZLdXU1zZo1iyIiImjo0KGa46/+d2O75Jw7GsMlx/XJndcE+iEq9jaQYcOGsblSU1PZXNy+1atXy9LF7dN1caVcfJzjyT3XdD1mgOjlBav6wtln3OP5pvW+efNm2rx5MxFJK2jG6ZJz7uB0yXl9cuc1gX6IMzECgUAgEAhaJOIWa4FAIBAIBC0SsYkRCAQCgUDQIhGbGIFAIBAIBC0SsYkRCAQCgUDQIhF3J8mQx48fU3FxMREROTo61nsHSHP5qqqqiIjIzMzMIA+3i8tXU1ND2dnZWn3m7e1NJiYmsvBxjif33OCCs8+4+1/OyDl3cLnkvj6J+POa4A00ZiW9PwPZ2dmYOnUq/Pz84OjoCEdHR/j5+WHq1KnIzs7Wy7Vx40Z4eXlBqVRqvby8vPCvf/1L79g4fcePH0d0dDRUKpXGo1KpEB0djRMnTjSbi9P34sUL/O1vf4NKpYJCodB6qVQqLFiwQK/S4dw+zvHknmulpaVYvnw5hg0bhuDgYAQHB2PYsGFYsWIFSkpKJHs4+4y7/2u5f/8+9u3bh6SkJCQlJWHfvn16P1aB2yXn3MHlkvv65M5rAmmITYwBHD58GKampggODsbixYvxzTff4JtvvsHixYsREhICMzMzHD16VJJrxYoVsLS0xLx585CamoqcnBzk5OQgNTUV8+fPh5WVFVauXCk5Nk7fv//9bxgbGyM+Ph7Jyck4fPgwDh8+jOTkZHz88ccwMTHBli1bmtzF7fviiy/Qtm1bJCUlIT8/HxUVFaioqEB+fj6+/fZbODg4YM6cOZJj4/Rxjif3XEtLS0ObNm3QoUMHjBs3DnPmzMGcOXMwbtw4dOzYEba2trh06ZIkF2efcY/n06dPMXr0aBgZGcHY2BgODg5wcHCAsbExjIyMMGbMGDx79qzJXXLOHZwuOa9P7rwmkI7YxBiAr68vFi5cWG/74sWL4ePjI8nl7OyMnTt31tv+ww8/oFOnTpJj4/R5eHhg3bp19bavX78enTt3bnIXt69du3Zv3HQePXoUDg4OkmPj9HGOJ/dcCwoKwqRJk6BWq+u0qdVqTJo0CcHBwZJcnH3GPZ7jx4+Hh4cHjh49qvUsnJqaGhw7dgxdunTBhAkTmtwl59zB6ZLz+uTOawLpiE2MAZibmyMvL6/e9ry8PJibm0t25eTk1NuenZ0NCwsLvWLj8pmZmbH9npwubp+lpSWysrLqbc/MzISVlZXk2Dh9nOPZGHMtNze33vbc3NxmGQPu8VSpVDh37ly97WfPnoVKpWpyl5xzB6dLzuuTO68JpCPuTjIAV1dXOnToUL3thw4dIhcXF0munj17UmJiItXU1NRpe/HiBS1fvpx69uwpOTZOX9euXWnTpk31tm/evJm8vb2b3MXtCw8Pp9mzZ9Pvv/9ep+3333+nuXPnUnh4uOTYOH2c48k91xwdHSktLa3e9rS0NGrXrp0kF2efcY+nWq0mU1PTettNTU1JrVY3uUvOuYPTJef1yZ3XBNIRjx0wgN27d9OoUaMoOjqa+vfvr0nUDx48oJMnT9LRo0dp+/btFBcX91ZXVlYWRUZGUnV1NYWGhmq5zpw5Q6ampnT8+HHq1q2bpNg4fadPn6bBgweTu7u7zt/z1q1bdOjQIQoNDW1SF7fv7t27NHDgQMrLyyMfHx8t19WrV8nb25sOHjxInTp1khQbp49zPLnn2vr16+mvf/0rTZ48mT744IM6Y7Bx40ZatWoVTZs27a0uzj7jHs/Ro0dTbm4ubdq0ifz9/bXafv31V5o4cSJ5enrS1q1bm9Ql59zB6ZLz+uTOawLpiE2MgZw/f57++c9/0oULF7Ru0evVqxf95S9/oV69ekl2lZeX09atW+mXX36p4xo1atRbH1ffmL7bt2/Thg0bdLqmTJlCrq6uzeLi9qnVajp27JhOV0REBCmV+p285PRxjif3XNu5cyd9/fXXlJ6ernlooZGREQUEBNDnn39OI0eOlOzi7DNO16NHj2jUqFF07NgxatOmDTk4OBARUUlJCZWVlVFkZCRt376dVCpVk7qI5J07OF1yXp/ceU0gDbGJEQgEbFRXV2tOz9vb2/9P1mLJzc3V+UHl6enZrC6B4M+I2MQwwVXIqbi4mC5evKhxOTk5UWBgIDk6Oja77/WiUE5OTuTl5cVSgMwQF7cvLS2tzpm1kJAQva4raCwf53hyzzVOOPuMezzlipxzB6dLzuuTO68JJNB81xT/b/B6ISeFQtGgQk6cdSO4fXIuQMbpe/DgAfr06QOFQgEXFxcEBgYiMDAQLi4uUCgU6NOnDx48eCA5Nk6fXGuUcMPZZ9zjCQBVVVXYuXMnZs6cifj4eMTHx2PmzJnYtWsXqqqqmsUl59zB6ZLz+myswoqCtyM2MQbAWciJs24Et0/OBcg4fXFxcejVq5fOWyXz8vIQEhKCDz/8UHJsnD651ijhhrPPuMfz+vXrcHd3h7m5OcLCwjBy5EiMHDkSYWFhMDc3R+fOnXH9+vUmd8k5d3C65Lw+ufOaQDpiE2MAnIWcOOtGcPvkXICM02dtbY0rV67U23758mVYW1tLjo3TJ9caJdxw9hn3ePbv3x8xMTF4/PhxnbbHjx8jJiYGERERTe6Sc+7gdMl5fXLnNYF0xAMgDaCkpIR8fHzqbffx8dFZg0AXnHUjuH3l5eXUvn37etudnJzo2bNnTe7i9pmZmdGTJ0/e+G/p80A3Tp9ca5Rww9ln3ON57tw5SktL03k3TatWrWjJkiUUFBTU5C455w5Ol5zXJ3deE+hBc++iWjJ9+/ZFQkICqqur67TV1NQgISEBoaGhklyjRo2Cv7+/zr8Mrly5goCAAIwePVpybJy+gQMHIiIiAqWlpXXaSktLERUVhUGDBjW5i9s3bdo0uLi4ICUlResv5MePHyMlJQWurq6YMWOG5Ng4fZzjyT3XOOHsM+7xdHJywoEDB+pt379/P5ycnJrcJefcwemS8/rkzmsC6YhNjAFkZmbC0dERdnZ2iI2NxZQpUzBlyhTExsbCzs4OTk5OuHr1qiTXH3/8gaioKCgUCtja2sLT0xOenp6wtbWFUqlEdHQ0Hj16JDk2Tl9BQQG6desGY2Nj+Pv7IyoqClFRUfD394exsTF8fX1RUFDQ5C5uX2VlJaZMmQJTU1MolUqYm5vD3NwcSqUSpqammDp1KiorKyXHxunjHE/uucYJZ59xj+fChQvRpk0bfPXVV8jMzERxcTGKi4uRmZmJr776Cra2tli8eHGTu+ScOzhdcl6f3HlNIB1xi7WBcBeZ4q4bkZeXp7MQn74+uRYgawzfkydPKD09XcsVEBCg91g2ho9rPInkXaOEs884XcuXL6c1a9ZQcXExKRQKIiICQI6OjjRz5kyaM2dOs7iI+MeT08c5b+W6PrnzkEAaYhMjEAj+VADQbBoaSn5+vtYHlZubmyxcAsGfDbGJYYCrkFNmZialp6dTeHg4ubu7U3Z2Nq1fv57UajXFxsZSZGSkZFdVVRUplUpNkaWbN2/S5s2bqaCggFxcXGj8+PEGJ8v333+fkpOTJT/k8lXUarXOv0zUajUVFhaSs7OzXj6uglUHDx6ktLQ0ioyMpN69e9OpU6do1apVpFarafjw4TRp0iTJrsLCQjI3Nyd7e3siIvr5558pKSlJMwbTp0/X67EUunB3d6djx46Rh4eH3j9bWFhIKpWKrK2ttY5XV1fThQsXmu05L8+fP6f09HSytbWt89C8yspK2rVrFyUkJDTYb2pqSpmZmeTl5WVoqLLhv//9L+3bt0/nGoiJiXnjxbX10ZjzoyHztjHWE+d6J/rzFFaUFc32Rdb/AJyFnPbs2QMjIyPY2dnB2toaJ06cgEqlQv/+/REZGQkjIyNs27ZNcmxhYWHYvXs3gJe3MZqZmcHX1xcfffQR/P39YWlpifPnz0ty/fjjjzpfRkZGWLdunea9FB4/fowRI0bA3NwcDg4OWLhwoVb9iOLiYiiVSsm/J2fBqqSkJBgbGyMgIACtWrXC999/DxsbG0yYMAGTJ0+GhYUFVq9eLTm2wMBAzcWb+/btg1KpxNChQzF37lzExsbCxMTkjRd3vsqaNWt0voyMjDB//nzNeyncu3cPPXv2hFKphJGREcaOHYvy8nJNu75jwMlvv/2mGTulUonQ0FAUFRU1KLZZs2bpfCmVSiQkJGjeSyU9PR23bt3SvN+yZQtCQkLQsWNH9O7dGzt27JD+iwJYu3Ytxo4dq/m5LVu2wMvLC++++y7mz5+v84YBXXDWnAF45wfnvOVcTwDvem+MwooCaYhNjAFwFnJ67733sHTpUgDAjh07oFKp8OWXX2raV61aBT8/P8mxtWrVCteuXQPwckPzerJesGABevfuLclV+4HyeiXKV19Sk9pnn32GLl26YPfu3di4cSNcXFwwaNAgTYXS4uJiKBQKyb8nZ8Eqb29vfPfddwCAU6dOwdzcHOvXr9e0Jycnw8vLS3JsVlZWmg+9oKAgJCYmarWvXbsW/v7+klwKhQIdO3aEq6ur1kuhUKBDhw5wdXWFm5ubJFdCQgKCgoJw6dIlnDhxAgEBAejRowf++OMPAPqPASfDhg3DoEGDUFpaiuvXr2PQoEFwc3PDnTt3NLFJnWsKhQJ+fn4IDw/XeikUCvTs2RPh4eHo16+f5Nh8fX1x4sQJAC8rdVtYWOCzzz7Dhg0bMHPmTFhbW2PTpk2SXEuWLIGNjQ3i4uLg6OiIxMRE2NnZYenSpVi2bBnatm2LRYsWSXJx1pwBeOcH57zlXE8A73rnLsQnkI7YxBgAZyEnKysr5OfnAwDUajVMTEyQlZWlab9586ZehZysrKyQm5sL4GUhpoyMDK32GzduSPbV3h74+l8SxsbGyM7OlhwT8LJAYGpqquZ9aWkpAgMDERERgcrKSr3PAnAWrLKwsNB8WAKAiYmJ1t1l+fn5sLS0lBxb69atkZmZCQBwcHDQ/HctN27ckOybPHky/Pz8kJOTo3W8IWPQvn17XLx4UfO+srISQ4YMgZ+fHx4+fNisZ2IcHBy05r1arcaUKVPg7OyMmzdv6hXbP/7xD7i5ueHkyZNaxxvSZ8DL+XH79m0AgL+/v+YDsJZt27bB29tbkuudd97Bnj17AAAZGRkwMjLC1q1bNe0pKSno3Lmz5LjedBdkVlYWLCwsJLkA3vnBOW851xPAu965C/EJpCMulzYAzkJONjY29PDhQyIiKisro5qaGs17IqKHDx/W+W76TQQFBdGBAweIiOidd96hzMxMrfaMjAyytbWV5Dpy5Ah98MEH1KNHDzp48KDkGHRRWlqqdQ2Nvb09/fTTT1ReXk4DBw6kiooKvXycBavs7Ozozp07RER07949qqmpoYKCAk37nTt3JPcZEVFYWBjt2LGDiIj8/f3p9OnTWu2pqanUoUMHSa6kpCRatGgRRUZG0rp16yTHoIvHjx9TmzZtNO/NzMwoJSWFXF1dqV+/flRSUmKQ3xCeP39Oxsb/X4NToVDQhg0baMiQIRQWFkbXrl2T7Jo3bx7t3LmTpk6dSrNnz6bq6mqDYrO0tNQUrywqKqLAwECt9qCgIMrPz5fkunfvHvXo0YOIiLp3705KpZL8/Pw07e+99x7du3dPkkulUtHt27frbb99+zapVCpJLiLe+cE5bznXExHveucuxCfQg+beRbVkOAs5jRkzBkFBQdi6dSuGDBmCyMhIBAcHIzc3F3l5eQgLC9PrdOT58+fRunVrLF68GGvXroW9vT0WLFiAbdu2YdGiRVCpVFi+fLlkHwD8+uuv8Pb2xqRJk/Ds2bMG/TX17rvv4tChQ3WOl5eXo1evXujevbteZwE4C1ZNnz4dHh4eWLp0KQIDAzFu3Dh4enriyJEjOHr0KHx8fPDJJ59Iji0nJwd2dnZISEjAkiVLYG1tjTFjxuDvf/87EhISYGZmhuTkZMk+ACgsLMT777+PqKgo3L9/v0Fj4OPjg//85z91jldXV2PYsGFwdnZutjMxPXv2xJYtW3S2TZ8+HSqVSu/YysvLkZCQAF9fX1y9ehUmJiYNOhMzZswYjB8/HgAwYsQILFiwQKt92bJl8PHxkeRyc3PDkSNHAADXrl2DUqnErl27NO2HDh2Cq6urJBdnzRmgceYHx7zlXk+c6527EJ9AOmITYwCchZyKi4sxYMAAWFtbIzIyEmVlZZgxY4bmehMPDw/cuHFDr/jOnz+P4ODgOtevdOjQQa8LVF+loqICkydPhoeHB4yMjPRORJ9++mm9m7EnT54gKChIrwTJWbDq6dOnmDhxIrp164ZJkyahqqoKK1euhKmpKRQKBcLDw/W+OO/GjRuIj4+HjY2Npv9NTEwQEhKCvXv36uWqRa1WY9myZXB0dGzQGMyZM6feaySqq6sxdOjQZtvELFu2DNHR0fW2T506tcHX6+zYsQPt2rWDUqls0CamqKgIrq6uCA0Nxeeffw4LCwv06dMHEydORGhoKExNTXVu0HWxYMECtG3bFhMmTICbmxvmzZsHZ2dnbNiwAUlJSejUqZNeFx0nJibCyclJky9qr2FzcnLS+4+Vxpofhs5bgHc9ca537kJ8AumIW6wZaMyiYbdu3aKKigry9PTUOs2uD6WlpXTr1i1Sq9Xk5ORErq6uBse1f/9+Sk1Npfnz55ODg4Pkn3v06BHdu3ePunbtqrO9vLycrly5QmFhYXrFw10A61UqKyupurqabGxsGuwAQCUlJaRWq8ne3l5z67shXLlyhX7++WdKSEjQOv3/NmpqaqiioqLevqmpqaGioqIG3TovdwoLCyk9PZ369+9PVlZWev98WVkZJSYm0oEDB7TWVO/evWnWrFmar4jehlqtpsTERLpw4QKFhIRovvqaM2cOVVRU0JAhQ2jdunV6x8hRc6ax50d6ejqdPXtW73n7Ko2xnmoxZL03Zh4S6EZsYgSCN3Du3Dnq0aMH2/fZnD65ugQCgaCpEBf2MlBYWEhPnz6tc7y6uprOnDnD8m88ePCAvvzySxYXt0+uLg5fdHQ0FRUVscXD6ZOrSyAv7t69S5988oksfXJ1NcT3/PlzOnv2LOXk5NRpq6yspC1btrDFJniF5vwuq6XTlEXDMjIyWK9T4PTJ1cXhs7a2xs2bN9ni4fTJ1SWQF3JbUy3Bpa+Ps0ijQD8adpGFgIhe3sKpVCrp4sWLVFZWRvPmzaN+/frR8ePHNd/1QuK3dVlZWW9s/+233/SKjdMnV1dj+ASClsb+/fvf2H7r1q1m88nVxe2bO3cudevWjS5fvkxlZWU0c+ZM6tOnD50+fVrvR6gI9ENcE2MAHTp0oL1792rqRVRVVdGIESPo7t27dPLkSaqurqb27dvTixcv3upSKpWkUCh0bnpqjysUCkkubp9cXY3he53t27dTTExMgy4CbWyfXF2CpuVNa6AWrjWlr0+uLm5fu3bt6KeffiIfHx8ievnH67Rp0+jw4cOUmppKVlZWkj8LBPohrokxAM6iULa2trRx40bKz8+v87p165beReY4fXJ1NYbvdUaNGsX6wc7pk6tL0LQ4OTlRSkoKqdVqna8rV640m0+uLm4fZ5FGgX6Ir5MMwN3dnbKysrSexGpsbEy7d++mESNG0ODBgyW7AgIC6N69e/XetlhWVib5qylun1xdjeETCFoaAQEBlJ6eTjExMTrb33a2oTF9cnVx+zw9Peny5ct1noxeW6V46NChkuMS6IfYxBhAdHQ0fffddxQXF6d1vHYjExcXR4WFhZJcU6ZMoWfPntXb7uzsTMnJyZJj4/TJ1dUYPoGgpfHFF1+8cQ107tyZUlNTm8UnVxe3LzY2lnbs2EFjx46t07Zu3TpSq9WUlJQkOTaBdMQ1MQbwZy4aJhAIBAJBcyOuiTEAY2PjOhuYc+fOUVVVlabdkA3Mqy4OOH1ydTWGTyAQCATyRJyJYaZVq1aUkZFB7u7usnJx++TqagyfQCAQCOSJOBPDDOeekHt/KdfY5Px7CgQCgUC+iE2MQCAQCASCFonYxDDz7bffUrt27WTn4vbJ1dUYPoFAIBDIE3FNjEAgEAgEghaJOBMjEAgEAoGgRSI2MQKBQCAQCFokYhMjEAgEAoGgRSI2MQKBQCAQCFokYhMjEAgEAoGgRSI2MQKBQCAQCFokYhMjEAgEAoGgRSI2MQKBQCAQCFok/wdX6/kNc1K83gAAAABJRU5ErkJggg==",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "f, ax = plt.subplots()\n",
- "sbn.heatmap(np.log2(result[0]))\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Combining snipping strategies\n",
- "Multiple snipping strategies can be used and dispatched together."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Snip ICCF and Obs/Exp together"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 38,
- "metadata": {},
- "outputs": [],
- "source": [
- "snip_strat_center_obs_exp = Triplet1DSnippingStrategy(bin_size=100_000,\n",
- " half_window_size=2_000_000,\n",
- " snipping_value=SnippingValues.OBSEXP) "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 39,
- "metadata": {},
- "outputs": [],
- "source": [
- "snipper = Snipper([snip_strat_center, snip_strat_center_obs_exp])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 40,
- "metadata": {},
- "outputs": [],
- "source": [
- "result = snipper.snip(\"../playground/test.parquet\", tad_boundaries, threads=40)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 45,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "f, ax = plt.subplots(1, 2)\n",
- "sbn.heatmap(np.log2(result[0]), ax=ax[0], square=True)\n",
- "ax[0].set_title(\"ICCF\")x\n",
- "sbn.heatmap(np.log2(result[1]), ax=ax[1], cmap=\"RdBu_r\", vmin=-0.5, vmax=0.5, square=True)\n",
- "ax[1].set_title(\"Obs/Exp\")\n",
- "f.set_size_inches(10, 6)\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Generate triplet strategies with different offsets"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 50,
- "metadata": {},
- "outputs": [],
- "source": [
- "offset_strategies = [\n",
- " Triplet1DSnippingStrategy(bin_size=100_000,\n",
- " half_window_size=2_000_000,\n",
- " snipping_value=SnippingValues.ICCF,\n",
- " relative_offset=offset)\n",
- " for offset in np.arange(-1_000_000, 1_000_001, 500_000)\n",
- "]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 52,
- "metadata": {},
- "outputs": [],
- "source": [
- "snipper = Snipper(offset_strategies)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 53,
- "metadata": {},
- "outputs": [],
- "source": [
- "result = snipper.snip(\"../playground/test.parquet\", tad_boundaries, threads=40)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 57,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "f, ax = plt.subplots(1, len(result))\n",
- "for index, array in enumerate(result):\n",
- " sbn.heatmap(np.log2(result[index]), ax=ax[index], square=True)\n",
- "f.set_size_inches(20, 5)\n",
- "plt.show()"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3.8.0 ('spoc-dev')",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.8.0"
- },
- "orig_nbformat": 4,
- "vscode": {
- "interpreter": {
- "hash": "ac12263fcb70de6040950844f8173d0af40e7d81fd3cab43722576f4e93bb274"
- }
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/setup.py b/setup.py
index 6df90e0..164f19d 100644
--- a/setup.py
+++ b/setup.py
@@ -1,8 +1,7 @@
#!/usr/bin/env python
-
"""The setup script."""
-
-from setuptools import setup, find_packages
+from setuptools import find_packages
+from setuptools import setup
requirements = [
@@ -17,7 +16,7 @@
"ipykernel",
"matplotlib>=3.5.3",
"seaborn>=0.11.2",
- "duckdb==0.3.4",
+ "duckdb==0.9.1",
"sparse==0.13.0",
"numba>=0.57.0",
]
diff --git a/spoc/cli.py b/spoc/cli.py
index a1c1326..01b627d 100644
--- a/spoc/cli.py
+++ b/spoc/cli.py
@@ -1,11 +1,15 @@
"""Console script for spoc."""
import sys
+
import click
+
from spoc.contacts import ContactManipulator
-from spoc.fragments import FragmentAnnotator, FragmentExpander
+from spoc.contacts import Contacts
+from spoc.fragments import FragmentAnnotator
+from spoc.fragments import FragmentExpander
from spoc.io import FileManager
+from spoc.models.dataframe_models import DataMode
from spoc.pixels import GenomicBinner
-from spoc.contacts import Contacts
@click.group()
@@ -85,7 +89,7 @@ def bin_contacts(
"""
# load data from disk
- file_manager = FileManager(use_dask=True)
+ file_manager = FileManager(DataMode.DASK)
contacts = Contacts.from_uri(contact_path)
# binning
binner = GenomicBinner(bin_size=bin_size)
@@ -110,7 +114,7 @@ def merge_contacts(contact_paths, output):
contact_paths (tuple): Paths to the input contact files.
output (str, optional): Path to the output merged contact file.
"""
- file_manager = FileManager(use_dask=True)
+ file_manager = FileManager(DataMode.DASK)
# get list of parameters
parameters = [file_manager.list_contacts(p) for p in contact_paths]
# get parameter counts -> if count > 1 then we need to concatenate
diff --git a/spoc/contacts.py b/spoc/contacts.py
index 32b4ed4..0f08941 100644
--- a/spoc/contacts.py
+++ b/spoc/contacts.py
@@ -1,12 +1,22 @@
"""Managing multi-way contacts."""
-
from __future__ import annotations # needed for self reference in type hints
-from itertools import permutations, product
-from typing import List, Optional, Dict
-import pandas as pd
+
+from itertools import permutations
+from itertools import product
+from typing import Dict
+from typing import List
+from typing import Optional
+from typing import Tuple
+
import dask.dataframe as dd
+import duckdb
import numpy as np
-from spoc.models.dataframe_models import ContactSchema, DataFrame
+import pandas as pd
+
+from spoc.models.dataframe_models import ContactSchema
+from spoc.models.dataframe_models import DataFrame
+from spoc.models.dataframe_models import DataMode
+from spoc.models.dataframe_models import GenomicDataSchema
from spoc.models.file_parameter_models import ContactsParameters
@@ -51,10 +61,15 @@ def __init__(
number_fragments=self.number_fragments,
contains_metadata=self.contains_metadata,
)
+ # TODO: make this work for duckdb pyrelation -> switch to mode
if isinstance(contact_frame, pd.DataFrame):
- self.is_dask = False
+ self.data_mode = DataMode.PANDAS
+ elif isinstance(contact_frame, dd.DataFrame):
+ self.data_mode = DataMode.DASK
+ elif isinstance(contact_frame, duckdb.DuckDBPyRelation):
+ self.data_mode = DataMode.DUCKDB
else:
- self.is_dask = True
+ raise ValueError("Unknown data mode!")
self._data = self._schema.validate(contact_frame)
self.metadata_combi = metadata_combi
self.label_sorted = label_sorted
@@ -62,7 +77,7 @@ def __init__(
self.symmetry_flipped = symmetry_flipped
@staticmethod
- def from_uri(uri, mode="pandas"):
+ def from_uri(uri, mode=DataMode.PANDAS):
"""Construct contacts from uri.
Will match parameters based on the following order:
@@ -76,7 +91,7 @@ def from_uri(uri, mode="pandas"):
# import here to avoid circular imports
from spoc.io import FileManager
- return FileManager(use_dask=mode == "dask").load_contacts(uri)
+ return FileManager(mode).load_contacts(uri)
def get_global_parameters(self) -> ContactsParameters:
"""Returns global parameters"""
@@ -88,6 +103,10 @@ def get_global_parameters(self) -> ContactsParameters:
symmetry_flipped=self.symmetry_flipped,
)
+ def get_schema(self) -> GenomicDataSchema:
+ """Returns the schema of the underlying data"""
+ return self._schema
+
def _guess_number_fragments(self, contact_frame: DataFrame) -> int:
"""Guesses the number of fragments from the contact frame"""
return max(int(i.split("_")[1]) for i in contact_frame.columns if "start" in i)
@@ -99,22 +118,26 @@ def get_label_values(self) -> List[str]:
raise ValueError("Contacts do not contain metadata!")
output = set()
for i in range(self.number_fragments):
- if self.is_dask:
+ if self.data_mode == DataMode.DASK:
output.update(self.data[f"metadata_{i+1}"].unique().compute())
- else:
+ elif self.data_mode == DataMode.PANDAS:
output.update(self.data[f"metadata_{i+1}"].unique())
- return output
+ else:
+ raise ValueError("Label values not supported for duckdb!")
+ return list(output)
def get_chromosome_values(self) -> List[str]:
"""Returns all chromosome values"""
# TODO: This could be put in global metadata of parquet file
output = set()
for i in range(self.number_fragments):
- if self.is_dask:
+ if self.data_mode == DataMode.DASK:
output.update(self.data[f"chrom_{i+1}"].unique().compute())
- else:
+ elif self.data_mode == DataMode.PANDAS:
output.update(self.data[f"chrom_{i+1}"].unique())
- return output
+ else:
+ raise ValueError("Chromosome values not supported for duckdb!")
+ return list(output)
@property
def data(self):
@@ -150,18 +173,22 @@ def merge_contacts(self, merge_list: List[Contacts]) -> Contacts:
# validate that merge is possible
if len({i.number_fragments for i in merge_list}) != 1:
raise ValueError("All contacts need to have the same order!")
- if len({i.is_dask for i in merge_list}) != 1:
- raise ValueError("Mixture of dask and pandas dataframes is not supported!")
+ if len({i.data_mode for i in merge_list}) != 1:
+ raise ValueError("Mixture of dataframes is not supported!")
# TODO: assert all have same labelling state
number_fragments = merge_list[0].number_fragments
- if merge_list[0].is_dask:
+ if merge_list[0].data_mode == DataMode.DASK:
return Contacts(
dd.concat([i.data for i in merge_list]),
number_fragments=number_fragments,
)
- return Contacts(
- pd.concat([i.data for i in merge_list]), number_fragments=number_fragments
- )
+ elif merge_list[0].data_mode == DataMode.PANDAS:
+ return Contacts(
+ pd.concat([i.data for i in merge_list]),
+ number_fragments=number_fragments,
+ )
+ else:
+ raise ValueError("Merging duckdb relations is not supported!")
@staticmethod
def _generate_rename_columns(order, start_index=1):
@@ -317,13 +344,15 @@ def sort_labels(self, contacts: Contacts) -> Contacts:
)
)
# determine which method to use for concatenation
- if contacts.is_dask:
+ if contacts.data_mode == DataMode.DASK:
# this is a bit of a hack to get the index sorted. Dask does not support index sorting
result = (
dd.concat(subsets).reset_index().sort_values("index").set_index("index")
)
- else:
+ elif contacts.data_mode == DataMode.PANDAS:
result = pd.concat(subsets).sort_index()
+ else:
+ raise ValueError("Sorting labels for duckdb relations is not implemented.")
return Contacts(
result, number_fragments=contacts.number_fragments, label_sorted=True
)
@@ -365,7 +394,7 @@ def _sort_chromosomes(self, df: DataFrame, number_fragments: int) -> DataFrame:
def _generate_binary_label_mapping(
self, label_values: List[str], number_fragments: int
- ) -> Dict[str, str]:
+ ) -> Dict[Tuple[str, ...], Tuple[str, ...]]:
sorted_labels = sorted(label_values)
mapping = {}
for i in range(number_fragments + 1):
@@ -422,13 +451,17 @@ def equate_binary_labels(self, contacts: Contacts) -> Contacts:
subset[f"metadata_{i+1}"] = j
subsets.append(subset)
# determine which method to use for concatenation
- if contacts.is_dask:
+ if contacts.data_mode == DataMode.DASK:
# this is a bit of a hack to get the index sorted. Dask does not support index sorting
result = (
dd.concat(subsets).reset_index().sort_values("index").set_index("index")
)
- else:
+ elif contacts.data_mode == DataMode.PANDAS:
result = pd.concat(subsets).sort_index()
+ else:
+ raise ValueError(
+ "Equate binary labels for duckdb relations is not implemented."
+ )
return Contacts(
result,
number_fragments=contacts.number_fragments,
diff --git a/spoc/fragments.py b/spoc/fragments.py
index a2ae4cd..519d3a3 100644
--- a/spoc/fragments.py
+++ b/spoc/fragments.py
@@ -1,14 +1,18 @@
"""This part of spoc is responsible for dealing aligned fragments that
have not yet been converted to contacts. It deals with label information
as well as expanding fragments to contacts."""
-
-from typing import Dict, Union
from itertools import combinations
-import pandas as pd
+from typing import Dict
+from typing import Union
+
import dask.dataframe as dd
import numpy as np
-from .models.dataframe_models import FragmentSchema, ContactSchema, DataFrame
+import pandas as pd
+
from .contacts import Contacts
+from .models.dataframe_models import ContactSchema
+from .models.dataframe_models import DataFrame
+from .models.dataframe_models import FragmentSchema
class Fragments:
diff --git a/spoc/io.py b/spoc/io.py
index 15c8a9b..0e9cb77 100644
--- a/spoc/io.py
+++ b/spoc/io.py
@@ -1,35 +1,50 @@
"""Persisting functionality of spoc that manages writing to and reading from the filesystem."""
-
+import json
+import os
import pickle
-from typing import Dict, Union, List, Optional, Tuple
+from functools import partial
from hashlib import md5
-import os
-import json
from pathlib import Path
-import pandas as pd
+from typing import Dict
+from typing import List
+from typing import Optional
+from typing import Tuple
+from typing import Union
+
import dask.dataframe as dd
+import duckdb
+import pandas as pd
+
from spoc.contacts import Contacts
-from spoc.pixels import Pixels
-from spoc.models.file_parameter_models import (
- ContactsParameters,
- PixelParameters,
- GlobalParameters,
-)
from spoc.fragments import Fragments
+from spoc.models.dataframe_models import DataMode
+from spoc.models.file_parameter_models import ContactsParameters
+from spoc.models.file_parameter_models import GlobalParameters
+from spoc.models.file_parameter_models import PixelParameters
+from spoc.pixels import Pixels
+
+# Instantiate one duckdb connection to be used for all duckdb relations
+DUCKDB_CONNECTION = duckdb.connect(database=":memory:")
class FileManager:
"""Is responsible for loading and writing files
Args:
- use_dask (bool, optional): Whether to use Dask for reading Parquet files. Defaults to False.
+ data_mode (DataMode, optional): Data mode. Defaults to DataMode.PANDAS.
"""
- def __init__(self, use_dask: bool = False) -> None:
- if use_dask:
+ def __init__(self, data_mode: DataMode = DataMode.PANDAS) -> None:
+ if data_mode == DataMode.DUCKDB:
+ self._parquet_reader_func = partial(
+ duckdb.read_parquet, connection=DUCKDB_CONNECTION
+ )
+ elif data_mode == DataMode.DASK:
self._parquet_reader_func = dd.read_parquet
- else:
+ elif data_mode == DataMode.PANDAS:
self._parquet_reader_func = pd.read_parquet
+ else:
+ raise ValueError(f"Data mode {data_mode} not supported!")
@staticmethod
def write_label_library(path: str, data: Dict[str, bool]) -> None:
@@ -163,17 +178,17 @@ def _parse_uri(
Tuple(str, Dict[str, str]): Tuple containing the path and a dictionary of parameters.
"""
# parse uri
- uri = uri.split("::")
+ uri_arguments = uri.split("::")
# validate uri
- if len(uri) < min_fields:
+ if len(uri_arguments) < min_fields:
raise ValueError(
f"Uri: {uri} is not valid. Must contain at least Path, number_fragments and binsize"
)
- params = dict(zip(uri_parameters, uri[1:]))
+ params = dict(zip(uri_parameters, uri_arguments[1:]))
# rewrite metadata_combi parameter
if "metadata_combi" in params.keys() and params["metadata_combi"] != "None":
params["metadata_combi"] = str(tuple(params["metadata_combi"]))
- return uri[0], params
+ return uri_arguments[0], params
def _fuzzy_match_parameters(
self,
@@ -207,10 +222,7 @@ def _fuzzy_match_parameters(
return matched_parameters[0]
def load_pixels(
- self,
- path: str,
- global_parameters: Optional[PixelParameters] = None,
- load_dataframe: bool = True,
+ self, path: str, global_parameters: Optional[PixelParameters] = None
) -> Pixels:
"""Loads specific pixels instance based on global parameters.
load_dataframe specifies whether the dataframe should be loaded, or whether pixels
@@ -219,7 +231,6 @@ def load_pixels(
Args:
path (str): Path to the pixel data.
global_parameters (PixelParameters): Global parameters.
- load_dataframe (bool, optional): Whether to load the dataframe. Defaults to True.
Returns:
Pixels: Pixels object containing the pixel data.
@@ -242,11 +253,8 @@ def load_pixels(
parsed_parameters, metadata
)
# rewrite path to contain parent folder
- pixel_path = Path(path) / pixel_path
- if load_dataframe:
- df = self._parquet_reader_func(pixel_path)
- else:
- df = pixel_path
+ full_pixel_path = Path(path) / pixel_path
+ df = self._parquet_reader_func(full_pixel_path)
return Pixels(df, **matched_parameters.dict())
def load_contacts(
@@ -278,8 +286,8 @@ def load_contacts(
parsed_parameters, metadata
)
# rewrite path to contain parent folder
- contacts_path = Path(path) / contacts_path
- df = self._parquet_reader_func(contacts_path)
+ full_contacts_path = Path(path) / contacts_path
+ df = self._parquet_reader_func(full_contacts_path)
return Contacts(df, **matched_parameters.dict())
@staticmethod
diff --git a/spoc/models/dataframe_models.py b/spoc/models/dataframe_models.py
index 4633d40..af42ff4 100644
--- a/spoc/models/dataframe_models.py
+++ b/spoc/models/dataframe_models.py
@@ -1,14 +1,22 @@
"""Dataframe models"""
-
-from typing import Iterable, Union, Dict
import copy
-import pandera as pa
-import pandas as pd
+from enum import auto
+from enum import Enum
+from typing import Dict
+from typing import Iterable
+from typing import List
+from typing import Optional
+from typing import Protocol
+from typing import Union
+
import dask.dataframe as dd
+import duckdb
+import pandas as pd
+import pandera as pa
# Define dataframe type
-DataFrame = Union[pd.DataFrame, dd.DataFrame]
+DataFrame = Union[pd.DataFrame, dd.DataFrame, duckdb.DuckDBPyRelation]
FragmentSchema = pa.DataFrameSchema(
{
@@ -29,6 +37,106 @@
coerce=True,
)
+RegionSchema = pa.DataFrameSchema(
+ {
+ "region_id": pa.Column(),
+ "region_chrom": pa.Column(str),
+ "region_start": pa.Column(int),
+ "region_end": pa.Column(int),
+ },
+ coerce=True,
+ unique=["region_id"],
+)
+
+# Protocol for genomic data
+
+
+class GenomicDataSchema(Protocol):
+ """Protocol for genomic data schema
+ to be used in the query engine"""
+
+ def get_position_fields(self) -> Dict[int, List[str]]:
+ """Returns the position fields as a dictionary
+ of framgent index to the respective fields"""
+
+ def get_contact_order(self) -> int:
+ """Returns the order of the genomic data"""
+
+ def get_schema(self) -> pa.DataFrameSchema:
+ """Return the schema of the underlying data"""
+
+ def get_binsize(self) -> Optional[int]:
+ """Returns the binsize of the genomic data"""
+
+ def get_region_number(self) -> Optional[int]:
+ """Returns the number of regions in the genomic data
+ if present."""
+
+ def get_half_window_size(self) -> Optional[int]:
+ """Returns the window size of the genomic data
+ if present."""
+
+
+class QueryStepDataSchema:
+ """Implements GenomicDataSchema for query steps
+ with generic columns"""
+
+ # pylint: disable=too-many-arguments
+ # arguments needed to define the schema
+ def __init__(
+ self,
+ columns: List[str],
+ position_fields: Dict[int, List[str]],
+ contact_order: int,
+ binsize: Optional[int] = None,
+ region_number: Optional[int] = None,
+ half_window_size: Optional[int] = None,
+ ) -> None:
+ self._columns = columns
+ self._contact_order = contact_order
+ self._position_fields = position_fields
+ self._binsize = binsize
+ self._region_number = region_number
+ self._half_window_size = half_window_size
+ self._schema = pa.DataFrameSchema(
+ {column: pa.Column() for column in columns},
+ coerce=True,
+ )
+
+ def get_position_fields(self) -> Dict[int, List[str]]:
+ """
+ Returns the position fields as a dictionary.
+
+ Returns:
+ A dictionary where the keys are integers representing positions
+ and the values are lists of strings representing the fields.
+ """
+ return self._position_fields
+
+ def get_contact_order(self) -> int:
+ """
+ Returns the contact order of the object.
+ """
+ return self._contact_order
+
+ def get_schema(self) -> pa.DataFrameSchema:
+ """Return the schema of the underlying data"""
+ return self._schema
+
+ def get_binsize(self) -> Optional[int]:
+ """Returns the binsize of the genomic data"""
+ return self._binsize
+
+ def get_region_number(self) -> Optional[int]:
+ """Returns the number of regions in the genomic data
+ if present."""
+ return self._region_number
+
+ def get_half_window_size(self) -> Optional[int]:
+ """Returns the half window size of the genomic data
+ if present."""
+ return self._half_window_size
+
# schemas for higher order contacts
@@ -113,6 +221,27 @@ def validate_header(self, data_frame: DataFrame) -> None:
self._schema, data_frame, "Header is invalid!"
)
+ def get_schema(self) -> pa.DataFrameSchema:
+ """
+ Get the schema of the DataFrame.
+
+ Returns:
+ pa.DataFrameSchema: The schema of the DataFrame.
+ """
+ return self._schema
+
+ def get_position_fields(self) -> Dict[int, List[str]]:
+ """Returns the position fields as a dictionary
+ of framgent index to the respective fields"""
+ return {
+ i: [f"chrom_{i}", f"start_{i}", f"end_{i}"]
+ for i in range(1, self._number_fragments + 1)
+ }
+
+ def get_contact_order(self) -> int:
+ """Returns the order of the genomic data"""
+ return self._number_fragments
+
def validate(self, data_frame: DataFrame) -> DataFrame:
"""Validate multiway contact dataframe
@@ -120,8 +249,25 @@ def validate(self, data_frame: DataFrame) -> DataFrame:
data_frame (DataFrame): The DataFrame to validate.
"""
self.validate_header(data_frame)
+ if isinstance(data_frame, duckdb.DuckDBPyRelation):
+ # duckdb does not support schema validation
+ return data_frame
return self._schema.validate(data_frame)
+ def get_binsize(self) -> Optional[int]:
+ """Returns the binsize of the genomic data"""
+ return None
+
+ def get_region_number(self) -> Optional[int]:
+ """Returns the number of regions in the genomic data
+ if present."""
+ return None
+
+ def get_half_window_size(self) -> Optional[int]:
+ """Returns the window size of the genomic data
+ if present."""
+ return None
+
class PixelSchema:
"""Dynamic schema for N-way pixels
@@ -131,9 +277,15 @@ class PixelSchema:
same_chromosome (bool, optional): Whether the fragments are on the same chromosome. Defaults to True.
"""
- def __init__(self, number_fragments: int = 3, same_chromosome: bool = True) -> None:
+ def __init__(
+ self,
+ number_fragments: int = 3,
+ same_chromosome: bool = True,
+ binsize: Optional[int] = None,
+ ) -> None:
self._number_fragments = number_fragments
self._same_chromosome = same_chromosome
+ self._binsize = binsize
self._schema = pa.DataFrameSchema(
dict(
self._get_constant_fields(),
@@ -182,6 +334,46 @@ def validate_header(self, data_frame: DataFrame) -> None:
self._schema, data_frame, "Header is invalid!"
)
+ def get_schema(self) -> pa.DataFrameSchema:
+ """
+ Get the schema of the DataFrame.
+
+ Returns:
+ pa.DataFrameSchema: The schema of the DataFrame.
+ """
+ return self._schema
+
+ def get_position_fields(self) -> Dict[int, List[str]]:
+ """Returns the position fields as a dictionary
+ of framgent index to the respective fields"""
+ if self._same_chromosome:
+ return {
+ i: ["chrom", f"start_{i}"] for i in range(1, self._number_fragments + 1)
+ }
+ else:
+ return {
+ i: [f"chrom_{i}", f"start_{i}"]
+ for i in range(1, self._number_fragments + 1)
+ }
+
+ def get_binsize(self) -> Optional[int]:
+ """Returns the binsize of the genomic data"""
+ return self._binsize
+
+ def get_region_number(self) -> Optional[int]:
+ """Returns the number of regions in the genomic data
+ if present."""
+ return None
+
+ def get_contact_order(self) -> int:
+ """Returns the order of the genomic data"""
+ return self._number_fragments
+
+ def get_half_window_size(self) -> Optional[int]:
+ """Returns the window size of the genomic data
+ if present."""
+ return None
+
def validate(self, data_frame: DataFrame) -> DataFrame:
"""Validate multiway contact dataframe
@@ -189,4 +381,16 @@ def validate(self, data_frame: DataFrame) -> DataFrame:
data_frame (DataFrame): The DataFrame to validate.
"""
+ self.validate_header(data_frame)
+ if isinstance(data_frame, duckdb.DuckDBPyRelation):
+ # duckdb does not support schema validation
+ return data_frame
return self._schema.validate(data_frame)
+
+
+class DataMode(Enum):
+ """Enum for data mode"""
+
+ PANDAS = auto()
+ DASK = auto()
+ DUCKDB = auto()
diff --git a/spoc/models/file_parameter_models.py b/spoc/models/file_parameter_models.py
index 38c072b..c89405e 100644
--- a/spoc/models/file_parameter_models.py
+++ b/spoc/models/file_parameter_models.py
@@ -1,7 +1,10 @@
"""This file contains data classes for parameters
of spoc data structures"""
-from typing import Optional, Tuple, List
-from pydantic import BaseModel, Field
+from typing import List
+from typing import Optional
+from typing import Tuple
+
+from pydantic import BaseModel
class GlobalParameters(BaseModel):
@@ -15,6 +18,7 @@ class GlobalParameters(BaseModel):
@classmethod
def get_uri_fields(cls) -> List[str]:
+ """Returns the fields that should be included in the URI"""
raise NotImplementedError
def __hash__(self) -> int:
@@ -35,6 +39,7 @@ class ContactsParameters(GlobalParameters):
@classmethod
def get_uri_fields(cls) -> List[str]:
+ """Returns the fields that should be included in the URI"""
# Specific parameters needed to enforce order
return [
"number_fragments",
@@ -53,6 +58,7 @@ class PixelParameters(GlobalParameters):
@classmethod
def get_uri_fields(cls) -> List[str]:
+ """Returns the fields that should be included in the URI"""
# Specific parameters needed to enforce order
return [
"number_fragments",
diff --git a/spoc/pixels.py b/spoc/pixels.py
index 4d6e8d3..ff4d9cc 100644
--- a/spoc/pixels.py
+++ b/spoc/pixels.py
@@ -1,12 +1,19 @@
"""This part of spoc is responsible for binned,
higher order contacts in the form of 'genomic pixels'"""
-from pathlib import Path
-from typing import Union, Optional, List
-import pandas as pd
+from functools import partial
+from typing import List
+from typing import Optional
+
import dask.dataframe as dd
-from spoc.models.dataframe_models import PixelSchema, DataFrame
-from spoc.models.file_parameter_models import PixelParameters
+import duckdb
+import pandas as pd
+
from spoc.contacts import Contacts
+from spoc.models.dataframe_models import DataFrame
+from spoc.models.dataframe_models import DataMode
+from spoc.models.dataframe_models import GenomicDataSchema
+from spoc.models.dataframe_models import PixelSchema
+from spoc.models.file_parameter_models import PixelParameters
class Pixels:
@@ -38,9 +45,9 @@ class Pixels:
def __init__(
self,
- pixel_source: Union[pd.DataFrame, dd.DataFrame, str],
- number_fragments: Optional[int] = None,
- binsize: Optional[int] = None,
+ pixel_source: DataFrame,
+ number_fragments: int,
+ binsize: int,
metadata_combi: Optional[List[str]] = None,
label_sorted: bool = False,
binary_labels_equal: bool = False,
@@ -51,7 +58,9 @@ def __init__(
can be a pandas or dask dataframe or a path. Caveat is that
if pixels are a path, source data is not validated."""
self._schema = PixelSchema(
- number_fragments=number_fragments, same_chromosome=same_chromosome
+ number_fragments=number_fragments,
+ same_chromosome=same_chromosome,
+ binsize=binsize,
)
self._same_chromosome = same_chromosome
self._number_fragments = number_fragments
@@ -60,18 +69,19 @@ def __init__(
self._symmetry_flipped = symmetry_flipped
self._metadata_combi = metadata_combi
self._label_sorted = label_sorted
- if isinstance(pixel_source, (pd.DataFrame, dd.DataFrame)):
- self._data = self._schema.validate(pixel_source)
- self._path = None
+ # get data mode
+ if isinstance(pixel_source, pd.DataFrame):
+ self.data_mode = DataMode.PANDAS
+ elif isinstance(pixel_source, dd.DataFrame):
+ self.data_mode = DataMode.DASK
+ elif isinstance(pixel_source, duckdb.DuckDBPyRelation):
+ self.data_mode = DataMode.DUCKDB
else:
- # check whether path exists
- if not Path(pixel_source).exists():
- raise ValueError(f"Path: {pixel_source} does not exist!")
- self._path = Path(pixel_source)
- self._data = None
+ raise ValueError("Unknown data mode!")
+ self._data = self._schema.validate(pixel_source)
@staticmethod
- def from_uri(uri, mode="path") -> "Pixels":
+ def from_uri(uri, mode=DataMode.PANDAS) -> "Pixels":
"""Construct pixels from uri.
Will match parameters based on the following order:
@@ -95,19 +105,7 @@ def from_uri(uri, mode="path") -> "Pixels":
# pylint: disable=import-outside-toplevel
from spoc.io import FileManager
- # get read mode
- if mode == "path":
- load_dataframe = False
- use_dask = False
- elif mode == "pandas":
- load_dataframe = True
- use_dask = False
- else:
- load_dataframe = True
- use_dask = True
- return FileManager(use_dask=use_dask).load_pixels(
- uri, load_dataframe=load_dataframe
- )
+ return FileManager(mode).load_pixels(uri)
def get_global_parameters(self) -> PixelParameters:
"""Returns global parameters of pixels
@@ -125,15 +123,6 @@ def get_global_parameters(self) -> PixelParameters:
same_chromosome=self._same_chromosome,
)
- @property
- def path(self) -> str:
- """Returns path of pixels
-
- Returns:
- str: The path of the pixels.
- """
- return self._path
-
@property
def data(self) -> DataFrame:
"""Returns pixels as dataframe
@@ -200,6 +189,10 @@ def same_chromosome(self) -> bool:
"""
return self._same_chromosome
+ def get_schema(self) -> GenomicDataSchema:
+ """Returns the schema of the underlying data"""
+ return self._schema
+
class GenomicBinner:
"""Bins higher order contacts into genomic bins of fixed size.
@@ -208,43 +201,45 @@ class GenomicBinner:
Args:
bin_size (int): The size of the genomic bins.
-
"""
def __init__(self, bin_size: int) -> None:
self._bin_size = bin_size
- self._contact_order = None
- def _get_assigned_bin_output_structure(self):
- columns = [f"chrom_{index}" for index in range(1, self._contact_order + 1)] + [
- f"start_{index}" for index in range(1, self._contact_order + 1)
+ def _get_assigned_bin_output_structure(self, contact_order: int):
+ columns = [f"chrom_{index}" for index in range(1, contact_order + 1)] + [
+ f"start_{index}" for index in range(1, contact_order + 1)
]
return pd.DataFrame(columns=columns).astype(int)
- def _assign_bins(self, data_frame: pd.DataFrame) -> pd.DataFrame:
+ def _assign_bins(
+ self, data_frame: pd.DataFrame, contact_order: int
+ ) -> pd.DataFrame:
# capture empty dataframe
if data_frame.empty:
- return self._get_assigned_bin_output_structure()
+ return self._get_assigned_bin_output_structure(contact_order)
return data_frame.assign(
**{
f"start_{index}": (data_frame[f"pos_{index}"] // self._bin_size)
* self._bin_size
- for index in range(1, self._contact_order + 1)
+ for index in range(1, contact_order + 1)
}
).filter(regex="(chrom|start)")
- def _assign_midpoints(self, contacts: dd.DataFrame) -> dd.DataFrame:
+ def _assign_midpoints(
+ self, contacts: dd.DataFrame, contact_order: int
+ ) -> dd.DataFrame:
"""Collapses start-end to a middle position"""
return contacts.assign(
**{
f"pos_{index}": (contacts[f"start_{index}"] + contacts[f"end_{index}"])
// 2
- for index in range(1, self._contact_order + 1)
+ for index in range(1, contact_order + 1)
}
).drop(
[
c
- for index in range(1, self._contact_order + 1)
+ for index in range(1, contact_order + 1)
for c in [f"start_{index}", f"end_{index}"]
],
axis=1,
@@ -261,19 +256,22 @@ def bin_contacts(self, contacts: Contacts, same_chromosome: bool = True) -> Pixe
Pixels: The binned genomic pixels.
"""
- self._contact_order = contacts.number_fragments
- contacts_w_midpoints = self._assign_midpoints(contacts.data)
- if contacts.is_dask:
+ contact_order = contacts.number_fragments
+ contacts_w_midpoints = self._assign_midpoints(contacts.data, contact_order)
+ if contacts.data_mode == DataMode.DASK:
contact_bins = contacts_w_midpoints.map_partitions(
- self._assign_bins, meta=self._get_assigned_bin_output_structure()
+ partial(self._assign_bins, contact_order=contact_order),
+ meta=self._get_assigned_bin_output_structure(contact_order),
)
+ elif contacts.data_mode == DataMode.PANDAS:
+ contact_bins = self._assign_bins(contacts_w_midpoints, contact_order)
else:
- contact_bins = self._assign_bins(contacts_w_midpoints)
+ raise ValueError(f"Data mode: {contacts.data_mode} not supported!")
pixels = (
contact_bins.groupby(
[
c
- for index in range(1, self._contact_order + 1)
+ for index in range(1, contact_order + 1)
for c in [f"chrom_{index}", f"start_{index}"]
],
observed=True,
@@ -290,26 +288,25 @@ def bin_contacts(self, contacts: Contacts, same_chromosome: bool = True) -> Pixe
& (pixels.chrom_2.astype(str) == pixels.chrom_3.astype(str))
]
.drop(
- [f"chrom_{index}" for index in range(2, self._contact_order + 1)],
+ [f"chrom_{index}" for index in range(2, contact_order + 1)],
axis=1,
)
.rename(columns={"chrom_1": "chrom"})
)
# sort pixels
pixels_sorted = pixels.sort_values(
- ["chrom"]
- + [f"start_{index}" for index in range(1, self._contact_order + 1)]
+ ["chrom"] + [f"start_{index}" for index in range(1, contact_order + 1)]
).reset_index(drop=True)
else:
pixels_sorted = pixels.sort_values(
- [f"chrom_{index}" for index in range(1, self._contact_order + 1)]
- + [f"start_{index}" for index in range(1, self._contact_order + 1)]
+ [f"chrom_{index}" for index in range(1, contact_order + 1)]
+ + [f"start_{index}" for index in range(1, contact_order + 1)]
).reset_index(drop=True)
# construct pixels and return
return Pixels(
pixels_sorted,
same_chromosome=same_chromosome,
- number_fragments=self._contact_order,
+ number_fragments=contact_order,
binsize=self._bin_size,
binary_labels_equal=contacts.binary_labels_equal,
symmetry_flipped=contacts.symmetry_flipped,
diff --git a/spoc/query_engine.py b/spoc/query_engine.py
new file mode 100644
index 0000000..6069897
--- /dev/null
+++ b/spoc/query_engine.py
@@ -0,0 +1,622 @@
+"""This file contains the classes making up the query engine."""
+from enum import Enum
+from itertools import product
+from typing import Any
+from typing import Callable
+from typing import Dict
+from typing import List
+from typing import Optional
+from typing import Protocol
+from typing import Tuple
+from typing import TypeVar
+from typing import Union
+
+import dask.dataframe as dd
+import duckdb
+import numpy as np
+import pandas as pd
+from pydantic import BaseModel
+
+from spoc.io import DUCKDB_CONNECTION
+from spoc.models.dataframe_models import GenomicDataSchema
+from spoc.models.dataframe_models import QueryStepDataSchema
+from spoc.models.dataframe_models import RegionSchema
+
+
+T = TypeVar("T")
+
+
+def convert_string_to_enum(enum_class: Callable[[str], T], string: str) -> T:
+ """Converts a string to an enum value"""
+ try:
+ return enum_class(string.upper())
+ except ValueError as exc:
+ raise ValueError(f"Invalid value for {enum_class.__name__}: {string}") from exc
+
+
+class GenomicData(Protocol):
+ """Protocol for genomic data
+ to be used in the query engine"""
+
+ @property
+ def data(self) -> Union[pd.DataFrame, duckdb.DuckDBPyRelation, dd.DataFrame]:
+ """Return the data in the object"""
+
+ def get_schema(self) -> GenomicDataSchema:
+ """Return the schema of the underlying data"""
+
+
+class QueryStep(Protocol):
+ """Protocol for query steps"""
+
+ def validate(self, data_schema: GenomicDataSchema) -> None:
+ """Validate the query step against the data schema"""
+
+ def __call__(self, *args: Any, **kwds: Any) -> "QueryPlan":
+ """Apply the query step to the data"""
+
+
+# TODO: think about allowing anchor composition
+class Anchor(BaseModel):
+ """Represents an anchor.
+
+ Attributes:
+ mode (str): The mode of the anchor. (Can be "ANY" or "ALL")
+ anchors (Optional[List[int]]): The list of anchor values (optional).
+ """
+
+ mode: str
+ anchors: Optional[List[int]] = None
+
+ def __repr__(self) -> str:
+ return f"Anchor(mode={self.mode}, anchors={self.anchors})"
+
+ def __str__(self) -> str:
+ return self.__repr__()
+
+
+class Overlap:
+ """
+ This class represents an overlap calculation used for contact and pixel selection.
+ It provides methods to validate the filter against a data schema,
+ convert data to a duckdb relation, construct a filter string,
+ and apply the filter to the data.
+ """
+
+ def __init__(
+ self,
+ regions: pd.DataFrame,
+ anchor_mode: Union[Anchor, Tuple[str, List[int]]],
+ half_window_size: Optional[int] = None,
+ ) -> None:
+ """
+ Initialize the Overlap object.
+
+ Args:
+ regions (pd.DataFrame): A DataFrame containing the regions data.
+ anchor_mode (Union[Anchor,Tuple[str,List[int]]]): The anchor mode to be used.
+ half_window_size (Optional[int]): The window size the regions should be expanded to. Defaults to None and is inferred from the data.
+
+ Returns:
+ None
+ """
+ # add ids to regions if they don't exist
+ if "id" not in regions.columns:
+ regions["id"] = range(len(regions))
+ if half_window_size is not None:
+ expanded_regions = regions.copy()
+ # create midpoint
+ expanded_regions["midpoint"] = (
+ expanded_regions["start"] + expanded_regions["end"]
+ ) // 2
+ # expand regions
+ expanded_regions["start"] = expanded_regions["midpoint"] - half_window_size
+ expanded_regions["end"] = expanded_regions["midpoint"] + half_window_size
+ # drop midpoint
+ expanded_regions = expanded_regions.drop(columns=["midpoint"])
+ self._regions = RegionSchema.validate(
+ expanded_regions.add_prefix("region_")
+ )
+ self._half_window_size = half_window_size
+ else:
+ self._regions = RegionSchema.validate(regions.add_prefix("region_"))
+ # infer window size -> variable regions will have largest possible window size
+ self._half_window_size = int(
+ (self._regions["region_end"] - self._regions["region_start"]).max() // 2
+ )
+ if isinstance(anchor_mode, tuple):
+ self._anchor_mode = Anchor(mode=anchor_mode[0], anchors=anchor_mode[1])
+ else:
+ self._anchor_mode = anchor_mode
+
+ def validate(self, data_schema: GenomicDataSchema) -> None:
+ """Validate the filter against the data schema"""
+ # check whether an anchor is specified that is not in the data
+ if self._anchor_mode.anchors is not None:
+ if not all(
+ anchor in data_schema.get_position_fields().keys()
+ for anchor in self._anchor_mode.anchors
+ ):
+ raise ValueError(
+ "An anchor is specified that is not in the data schema."
+ )
+
+ def _convert_to_duckdb(
+ self,
+ data: Union[pd.DataFrame, dd.DataFrame],
+ ) -> duckdb.DuckDBPyRelation:
+ """
+ Converts the data to a duckdb relation.
+
+ Parameters:
+ data (Union[pd.DataFrame, dd.DataFrame, duckdb.DuckDBPyRelation]): The input data to be converted.
+
+ Returns:
+ duckdb.DuckDBPyRelation: The converted duckdb relation.
+ """
+ if isinstance(data, dd.DataFrame):
+ data = data.compute()
+ return duckdb.from_df(data, connection=DUCKDB_CONNECTION)
+
+ def _contstruct_filter(self, position_fields: Dict[int, List[str]]) -> str:
+ """Constructs the filter string.
+
+ Args:
+ position_fields (List[str]): List of position fields.
+
+ Returns:
+ str: The constructed filter string.
+
+ Raises:
+ NotImplementedError: If the length of fields is not equal to 3.
+ """
+ query_strings = []
+ join_string = " or " if self._anchor_mode.mode == "ANY" else " and "
+ # subset on anchor regions
+ if self._anchor_mode.anchors is not None:
+ subset_positions = [
+ position_fields[anchor] for anchor in self._anchor_mode.anchors
+ ]
+ else:
+ subset_positions = list(position_fields.values())
+ for fields in subset_positions:
+ chrom, start, end = fields
+ output_string = f"""(data.{chrom} = regions.region_chrom and
+ (
+ data.{start} between regions.region_start and regions.region_end or
+ data.{end} between regions.region_start and regions.region_end or
+ regions.region_start between data.{start} and data.{end}
+ )
+ )"""
+ query_strings.append(output_string)
+ return join_string.join(query_strings)
+
+ def _get_transformed_schema(
+ self,
+ data_frame: duckdb.DuckDBPyRelation,
+ input_schema: GenomicDataSchema,
+ position_fields: Dict[int, List[str]],
+ ) -> GenomicDataSchema:
+ """Returns the schema of the transformed data."""
+ # construct schema
+ return QueryStepDataSchema(
+ columns=data_frame.columns,
+ position_fields=position_fields,
+ contact_order=input_schema.get_contact_order(),
+ binsize=input_schema.get_binsize(),
+ region_number=len(self._regions),
+ half_window_size=self._half_window_size,
+ )
+
+ def _add_end_position(
+ self,
+ data_frame: duckdb.DuckDBPyRelation,
+ bin_size: Optional[int],
+ position_fields: Dict[int, List[str]],
+ ) -> duckdb.DuckDBPyRelation:
+ """Adds an end position column to the dataframe"""
+ position_columns = {j for i in position_fields.values() for j in i}
+ non_position_columns = [
+ column for column in data_frame.columns if column not in position_columns
+ ]
+ new_position_clause = [
+ f"data.chrom as chrom_{position}, data.start_{position} as start_{position}, data.start_{position} + {bin_size} as end_{position}"
+ for position in position_fields.keys()
+ ]
+ # add end position
+ return data_frame.set_alias("data").project(
+ ",".join(new_position_clause + non_position_columns)
+ )
+
+ def __repr__(self) -> str:
+ return f"Overlap(anchor_mode={self._anchor_mode})"
+
+ def __call__(self, genomic_data: GenomicData) -> GenomicData:
+ """Apply the filter to the data"""
+ # get input schema
+ input_schema = genomic_data.get_schema()
+ # bring input to duckdb dataframe
+ if isinstance(genomic_data.data, duckdb.DuckDBPyRelation):
+ genomic_df = genomic_data.data
+ else:
+ genomic_df = self._convert_to_duckdb(genomic_data.data)
+ regions = self._convert_to_duckdb(self._regions)
+ # get position columns and construct filter
+ position_fields = input_schema.get_position_fields()
+ # add end position if not present
+ if len(position_fields[1]) == 2:
+ genomic_df = self._add_end_position(
+ genomic_df, input_schema.get_binsize(), position_fields
+ )
+ position_fields = {
+ position: [f"chrom_{position}", f"start_{position}", f"end_{position}"]
+ for position in position_fields.keys()
+ }
+ # construct query
+ snipped_df = genomic_df.set_alias("data").join(
+ regions.set_alias("regions"), self._contstruct_filter(position_fields)
+ )
+ return QueryPlan(
+ snipped_df,
+ self._get_transformed_schema(snipped_df, input_schema, position_fields),
+ )
+
+
+class AggregationFunction(Enum):
+ """Enum for aggregation functions.
+ Options are:
+ SUM: Sum of values.
+ AVG_WITH_EMPTY: Average of values, empty values are counted as 0.
+ AVG: Average of values, empty values are not counted.
+ COUNT: Number of values.
+ """
+
+ SUM: str = "SUM"
+ AVG_WITH_EMPTY: str = "AVG_WITH_EMPTY"
+ AVG: str = "AVG"
+ COUNT: str = "COUNT"
+
+
+class DistanceAggregation:
+ """Aggregation based on distances from a region. Uses all available distances."""
+
+ def __init__(
+ self,
+ value_column: str,
+ function: Union[AggregationFunction, str] = AggregationFunction.AVG,
+ densify_output: bool = True,
+ position_list: Optional[List[int]] = None,
+ ) -> None:
+ """Initialize the aggregation.
+
+ Args:
+ value_column (str): The name of the column to be aggregated.
+ function (Union[AggregationFunction,str]): The aggregation function to be applied. Defaults to AggregationFunction.AVG.
+ densify_output (bool, optional): Whether to densify the output. Defaults to True.
+ This requires a binsize value to be set in the data schema.
+ position_list (Optional[List[int]]): The list of positions to use for aggregations, starting with 1. Defaults to using all positions.
+ """
+ if isinstance(function, str):
+ parsed_function = convert_string_to_enum(AggregationFunction, function)
+ else:
+ parsed_function = function
+ self._function = parsed_function
+ self._value_column = value_column
+ self._densify_output = densify_output
+ self._position_list = position_list
+
+ def validate(self, data_schema: GenomicDataSchema) -> None:
+ """Validate the aggregation against the data schema"""
+ # check that at leastl one distance field is present
+ if "distance_1" not in data_schema.get_schema().columns:
+ raise ValueError("No distance fields in data schema.")
+ # check that all position fields are present
+ if self._position_list is not None:
+ for position in self._position_list:
+ if position not in data_schema.get_position_fields():
+ raise ValueError(f"Position {position} not in data schema.")
+ # check that value column is present
+ if self._value_column not in data_schema.get_schema().columns:
+ raise ValueError("Value column not in data schema.")
+ # check for binsize -> only pixels have that
+ if data_schema.get_binsize() is None:
+ raise ValueError("No binsize specified in data schema.")
+ # check for window size
+ if data_schema.get_half_window_size() is None:
+ raise ValueError("No window size specified in data schema.")
+
+ def _get_transformed_schema(
+ self,
+ data_frame: duckdb.DuckDBPyRelation,
+ input_schema: GenomicDataSchema,
+ position_fields: Dict[int, List[str]],
+ ) -> GenomicDataSchema:
+ """Returns the schema of the transformed data."""
+ # construct schema
+ return QueryStepDataSchema(
+ columns=data_frame.columns,
+ position_fields=position_fields,
+ contact_order=len(position_fields),
+ binsize=input_schema.get_binsize(),
+ )
+
+ def _aggregate_distances(
+ self,
+ data_frame: duckdb.DuckDBPyRelation,
+ input_schema: GenomicDataSchema,
+ position_fields: Dict[int, List[str]],
+ ) -> duckdb.DuckDBPyRelation:
+ """Aggregates the distances."""
+ # get distance columns
+ distance_columns = [
+ f"distance_{position}" for position in position_fields.keys()
+ ]
+ # construct aggregation
+ if self._function == AggregationFunction.COUNT:
+ aggregation_string = (
+ f"COUNT(*) as {self._value_column}_{self._function.name.lower()}"
+ )
+ elif self._function == AggregationFunction.AVG_WITH_EMPTY:
+ # For average, we need to sum up the values and divide by the number of regions
+ aggregation_string = f"SUM({self._value_column})::float/{input_schema.get_region_number()} as {self._value_column}_{self._function.name.lower()}"
+ else:
+ aggregation_string = f"{self._function.name}({self._value_column}) as {self._value_column}_{self._function.name.lower()}"
+ data_frame = (
+ data_frame.set_alias("data")
+ .aggregate(
+ ",".join(distance_columns + [aggregation_string]),
+ )
+ .order(",".join(distance_columns))
+ )
+ return data_frame
+
+ def _create_empty_dense_output(
+ self,
+ input_schema: GenomicDataSchema,
+ position_fields: Dict[int, List[str]],
+ ) -> duckdb.DuckDBPyRelation:
+ """Create dense value columns for all distances."""
+ binsize: Optional[int] = input_schema.get_binsize()
+ if binsize is None:
+ raise ValueError("No binsize specified in data schema.")
+ int_binsize: int = binsize
+ windowsize: Optional[int] = input_schema.get_half_window_size()
+ if windowsize is None:
+ raise ValueError("No window size specified in data schema.")
+ int_windowsize: int = windowsize
+ # create combinations of distances
+ distance_combinations = pd.DataFrame(
+ product(
+ np.arange(
+ -(np.floor(int_windowsize / int_binsize) * int_binsize),
+ (np.floor(int_windowsize / int_binsize) * int_binsize) + 1,
+ int_binsize,
+ ),
+ repeat=len(position_fields.keys()),
+ ),
+ columns=[f"distance_{i}" for i in position_fields.keys()],
+ )
+ # fill value
+ if self._function in (
+ AggregationFunction.COUNT,
+ AggregationFunction.SUM,
+ AggregationFunction.AVG_WITH_EMPTY,
+ ):
+ distance_combinations["fill_value"] = 0
+ else:
+ distance_combinations["fill_value"] = np.nan
+ return duckdb.from_df(distance_combinations, connection=DUCKDB_CONNECTION)
+
+ def _fill_empty_output(
+ self,
+ data_frame: duckdb.DuckDBPyRelation,
+ empty_dense_output: duckdb.DuckDBPyRelation,
+ position_fields: Dict[int, List[str]],
+ ) -> duckdb.DuckDBPyRelation:
+ """Fill empty output with values from dense output."""
+ # get distance columns
+ distance_columns = [f"distance_{i}" for i in position_fields.keys()]
+ # construct join and coalesce output
+ data_frame = (
+ data_frame.set_alias("data")
+ .join(
+ empty_dense_output.set_alias("empty_dense_output"),
+ ",".join(distance_columns),
+ how="right",
+ )
+ .project(
+ ",".join(distance_columns)
+ + f", COALESCE(data.{self._value_column}_{self._function.name.lower()}, empty_dense_output.fill_value) as {self._value_column}"
+ )
+ .set_alias("filled")
+ .order(",".join([f"filled.{col}" for col in distance_columns]))
+ )
+ return data_frame
+
+ def __call__(self, genomic_data: GenomicData) -> GenomicData:
+ """Apply the aggregation to the data"""
+ # get input schema
+ input_schema = genomic_data.get_schema()
+ # bring input to duckdb dataframe
+ if isinstance(genomic_data.data, duckdb.DuckDBPyRelation):
+ genomic_df = genomic_data.data
+ else:
+ genomic_df = duckdb.from_df(genomic_data.data, connection=DUCKDB_CONNECTION)
+ # get position columns
+ position_fields = input_schema.get_position_fields()
+ if self._position_list is not None:
+ position_fields = {
+ position: position_fields[position] for position in self._position_list
+ }
+ # construct transformation
+ aggregated_data = self._aggregate_distances(
+ genomic_df, input_schema, position_fields
+ )
+ if self._densify_output:
+ empty_dense_output = self._create_empty_dense_output(
+ input_schema, position_fields
+ )
+ aggregated_data = self._fill_empty_output(
+ aggregated_data, empty_dense_output, position_fields
+ )
+ return QueryPlan(
+ aggregated_data,
+ self._get_transformed_schema(
+ aggregated_data, input_schema, position_fields
+ ),
+ )
+
+
+class DistanceMode(Enum):
+ """Enum for distance modes."""
+
+ LEFT: str = "LEFT"
+ RIGHT: str = "RIGHT"
+ BOTH: str = "BOTH"
+ MIDPOINT: str = "MIDPOINT"
+
+
+class DistanceTransformation:
+ """Adds distance columns for each position field relative
+ to required region_columns."""
+
+ def __init__(
+ self, distance_mode: Union[DistanceMode, str] = DistanceMode.LEFT
+ ) -> None:
+ """Initialize the transformation.
+
+ Args:
+ distance_mode (Union[DistanceMode,str]): The distance mode to be used. Defaults to DistanceMode.MIDPOINT.
+ Specifies how the distance is calculated relative to the region midpoint.
+ Note that the distance is always calculated relative to the midpoint of the region.
+ If a binsize is specificed in the data schema, this needs to be set to LEFT.
+ """
+ if isinstance(distance_mode, str):
+ distance_mode = convert_string_to_enum(DistanceMode, distance_mode)
+ self._distance_mode = distance_mode
+
+ def validate(self, data_schema: GenomicDataSchema) -> None:
+ """Validate the transformation against the data schema"""
+ # check that there are position fields and region columns
+ if not data_schema.get_position_fields():
+ raise ValueError("No position fields in data schema.")
+ schema_columns = data_schema.get_schema().columns
+ required_columns = ["region_chrom", "region_start", "region_end"]
+ if not all(column in schema_columns for column in required_columns):
+ raise ValueError("No region columns in data schema.")
+ if (
+ self._distance_mode != DistanceMode.LEFT
+ and data_schema.get_binsize() is not None
+ ):
+ raise ValueError(
+ "Binsize specified in data schema, but distance mode is not set to LEFT."
+ )
+
+ def _create_transform_columns(
+ self, genomic_df: duckdb.DuckDBPyRelation, input_schema: GenomicDataSchema
+ ) -> duckdb.DuckDBPyRelation:
+ """Creates the transform columns for the given position fields"""
+ # position fields
+ position_fields = input_schema.get_position_fields()
+ # get existing columns
+ transform_strings = [f"data.{column}" for column in genomic_df.columns]
+ # check whether binsize is specified
+ if input_schema.get_binsize() is not None:
+ binsize = input_schema.get_binsize()
+ else:
+ binsize = 1
+ # create transform columns
+ for position_field, fields in position_fields.items():
+ _, start, end = fields
+ if self._distance_mode == DistanceMode.MIDPOINT:
+ output_string = f"""(FLOOR((data.{start} + data.{end})/2) - FLOOR((data.region_start + data.region_end)/2))
+ as distance_{position_field}"""
+ if self._distance_mode == DistanceMode.LEFT:
+ output_string = f"""data.{start} - FLOOR((FLOOR(data.region_start/{binsize}) * {binsize}
+ + FLOOR(data.region_end/{binsize}) * {binsize})/2) as distance_{position_field}"""
+ if self._distance_mode == DistanceMode.RIGHT:
+ output_string = f"""data.{end} - FLOOR((data.region_start + data.region_end)/2) as distance_{position_field}"""
+ if self._distance_mode == DistanceMode.BOTH:
+ output_string = f"""data.{start} - FLOOR((data.region_start + data.region_end)/2) as start_distance_{position_field},
+ data.{end} - FLOOR((data.region_start + data.region_end)/2) as end_distance_{position_field}"""
+ transform_strings.append(output_string)
+ return ",".join(transform_strings)
+
+ def _get_transformed_schema(
+ self, data_frame: duckdb.DuckDBPyRelation, input_schema: GenomicDataSchema
+ ) -> GenomicDataSchema:
+ """Returns the schema of the transformed data."""
+ # construct schema
+ return QueryStepDataSchema(
+ columns=data_frame.columns,
+ position_fields=input_schema.get_position_fields(),
+ contact_order=input_schema.get_contact_order(),
+ binsize=input_schema.get_binsize(),
+ region_number=input_schema.get_region_number(),
+ half_window_size=input_schema.get_half_window_size(),
+ )
+
+ def __call__(self, genomic_data: GenomicData) -> GenomicData:
+ """Apply the transformation to the data"""
+ # get input schema
+ input_schema = genomic_data.get_schema()
+ # bring input to duckdb dataframe
+ if isinstance(genomic_data.data, duckdb.DuckDBPyRelation):
+ genomic_df = genomic_data.data
+ else:
+ genomic_df = duckdb.from_df(genomic_data.data, connection=DUCKDB_CONNECTION)
+ # construct transformation
+ transformed_df = genomic_df.set_alias("data").project(
+ self._create_transform_columns(genomic_df, input_schema)
+ )
+ return QueryPlan(
+ transformed_df, self._get_transformed_schema(transformed_df, input_schema)
+ )
+
+
+class QueryPlan:
+ """Result of a query"""
+
+ def __init__(
+ self,
+ data: Union[pd.DataFrame, duckdb.DuckDBPyRelation],
+ schema: GenomicDataSchema,
+ ) -> None:
+ self._data = data
+ self._schema = schema
+
+ @property
+ def data(self) -> Union[pd.DataFrame, duckdb.DuckDBPyRelation]:
+ """Returns the result as a dataframe object, either in memory or as a relation object"""
+ return self._data
+
+ def compute(self) -> pd.DataFrame:
+ """Loads the result into memory"""
+ if isinstance(self._data, duckdb.DuckDBPyRelation):
+ return self._data.to_df()
+ return self._data
+
+ def get_schema(self) -> GenomicDataSchema:
+ """Returns the schema of the result"""
+ return self._schema
+
+
+# pylint: disable=too-few-public-methods
+# this is a wrapper with one task, so it only has one method
+class Query:
+ """Basic query engine that runs a query plan on the data"""
+
+ def __init__(self, query_steps: List[QueryStep]) -> None:
+ self._query_steps = query_steps
+
+ def build(self, input_data: GenomicData) -> QueryPlan:
+ """Runs the query on the data and returns the result"""
+ # instantiate query result
+ query_plan = QueryPlan(input_data.data, input_data.get_schema())
+ # run query
+ for step in self._query_steps:
+ # validate schema
+ step.validate(query_plan.get_schema())
+ # apply step
+ query_plan = step(query_plan)
+ return query_plan
diff --git a/tests/fixtures/symmetry.py b/tests/conftest.py
similarity index 98%
rename from tests/fixtures/symmetry.py
rename to tests/conftest.py
index 02152f5..4bdc06a 100644
--- a/tests/fixtures/symmetry.py
+++ b/tests/conftest.py
@@ -1,8 +1,10 @@
"""Fixtures for testing symmetry.py"""
# pylint: disable=redefined-outer-name
-import pytest
-import pandas as pd
+from __future__ import annotations
+
import dask.dataframe as dd
+import pandas as pd
+import pytest
@pytest.fixture
@@ -23,7 +25,7 @@ def unlabelled_contacts_2d():
"mapping_quality_2": [10, 10, 10],
"align_score_2": [10, 10, 10],
"align_base_qscore_2": [10, 10, 10],
- }
+ },
)
@@ -56,7 +58,7 @@ def unlabelled_contacts_3d():
"mapping_quality_3": [10, 10, 5],
"align_score_3": [10, 10, 5],
"align_base_qscore_3": [10, 10, 5],
- }
+ },
)
@@ -83,7 +85,7 @@ def unlabelled_contacts_2d_flipped():
"mapping_quality_2": [10, 10, 15],
"align_score_2": [10, 10, 15],
"align_base_qscore_2": [10, 10, 15],
- }
+ },
)
@@ -111,7 +113,7 @@ def unlabelled_contacts_3d_flipped():
"mapping_quality_3": [10, 10, 15],
"align_score_3": [10, 10, 15],
"align_base_qscore_3": [10, 10, 15],
- }
+ },
)
@@ -135,7 +137,7 @@ def labelled_binary_contacts_2d():
"align_score_2": [10, 10, 15],
"align_base_qscore_2": [10, 10, 15],
"metadata_2": ["B", "A", "A"],
- }
+ },
)
@@ -159,7 +161,7 @@ def labelled_binary_contacts_2d_sorted():
"align_score_2": [10, 10, 15],
"align_base_qscore_2": [10, 10, 15],
"metadata_2": ["B", "B", "A"],
- }
+ },
)
@@ -190,7 +192,7 @@ def labelled_binary_contacts_3d():
"align_score_3": [10, 10, 15],
"align_base_qscore_3": [10, 10, 15],
"metadata_3": ["B", "A", "A"],
- }
+ },
)
@@ -221,7 +223,7 @@ def labelled_binary_contacts_3d_sorted():
"align_score_3": [10, 10, 15],
"align_base_qscore_3": [10, 10, 15],
"metadata_3": ["B", "B", "A"],
- }
+ },
)
@@ -245,7 +247,7 @@ def binary_contacts_not_equated_2d():
"align_score_2": [10, 10, 15],
"align_base_qscore_2": [10, 10, 15],
"metadata_2": ["B", "B", "A"],
- }
+ },
)
@@ -276,7 +278,7 @@ def binary_contacts_not_equated_3d():
"align_score_3": [10, 10, 15],
"align_base_qscore_3": [10, 10, 15],
"metadata_3": ["B", "B", "B"],
- }
+ },
)
@@ -314,7 +316,7 @@ def binary_contacts_not_equated_4d():
"align_score_4": [10, 10, 15],
"align_base_qscore_4": [10, 10, 15],
"metadata_4": ["B", "B", "B"],
- }
+ },
)
@@ -338,7 +340,7 @@ def binary_contacts_equated_2d():
"align_score_2": [10, 10, 15],
"align_base_qscore_2": [10, 10, 15],
"metadata_2": ["A", "B", "A"],
- }
+ },
)
@@ -369,7 +371,7 @@ def binary_contacts_equated_3d():
"align_score_3": [10, 10, 15],
"align_base_qscore_3": [10, 10, 15],
"metadata_3": ["B", "B", "A"],
- }
+ },
)
@@ -407,7 +409,7 @@ def binary_contacts_equated_4d():
"align_score_4": [10, 10, 15],
"align_base_qscore_4": [10, 10, 15],
"metadata_4": ["B", "B", "A"],
- }
+ },
)
@@ -431,7 +433,7 @@ def labelled_binary_contacts_2d_unflipped():
"align_score_2": [10, 10, 15],
"align_base_qscore_2": [10, 10, 15],
"metadata_2": ["B", "B", "A"],
- }
+ },
)
@@ -462,7 +464,7 @@ def labelled_binary_contacts_3d_unflipped():
"align_score_3": [10, 10, 15, 14],
"align_base_qscore_3": [10, 10, 15, 14],
"metadata_3": ["B", "B", "B", "A"],
- }
+ },
)
@@ -493,7 +495,7 @@ def labelled_binary_contacts_3d_unflipped_example2():
"align_score_3": [10, 10, 14],
"align_base_qscore_3": [10, 10, 14],
"metadata_3": ["B", "B", "A"],
- }
+ },
)
@@ -517,7 +519,7 @@ def labelled_binary_contacts_2d_flipped():
"align_score_2": [10, 10, 10],
"align_base_qscore_2": [10, 10, 10],
"metadata_2": ["B", "B", "A"],
- }
+ },
)
@@ -548,7 +550,7 @@ def labelled_binary_contacts_3d_flipped():
"align_score_3": [10, 10, 15, 20],
"align_base_qscore_3": [10, 10, 15, 20],
"metadata_3": ["B", "B", "B", "A"],
- }
+ },
)
@@ -579,7 +581,7 @@ def labelled_binary_contacts_3d_flipped_example2():
"align_score_3": [10, 10, 14],
"align_base_qscore_3": [10, 10, 14],
"metadata_3": ["B", "B", "A"],
- }
+ },
)
@@ -601,7 +603,7 @@ def unlabelled_contacts_diff_chrom_2d():
"mapping_quality_2": [10, 10, 10],
"align_score_2": [10, 10, 10],
"align_base_qscore_2": [10, 10, 10],
- }
+ },
)
@@ -629,7 +631,7 @@ def unlabelled_contacts_diff_chrom_3d():
"mapping_quality_3": [10, 10, 5],
"align_score_3": [10, 10, 5],
"align_base_qscore_3": [10, 10, 5],
- }
+ },
)
@@ -663,7 +665,7 @@ def unlabelled_contacts_diff_chrom_4d():
"mapping_quality_4": [10, 10, 10],
"align_score_4": [10, 10, 10],
"align_base_qscore_4": [10, 10, 10],
- }
+ },
)
@@ -697,7 +699,7 @@ def unlabelled_contacts_diff_chrom_4d_flipped():
"mapping_quality_4": [10, 10, 10],
"align_score_4": [10, 10, 10],
"align_base_qscore_4": [10, 10, 10],
- }
+ },
)
@@ -725,7 +727,7 @@ def unlabelled_contacts_diff_chrom_3d_flipped():
"mapping_quality_3": [10, 10, 15],
"align_score_3": [10, 10, 15],
"align_base_qscore_3": [10, 10, 15],
- }
+ },
)
@@ -747,7 +749,7 @@ def unlabelled_contacts_diff_chrom_2d_flipped():
"mapping_quality_2": [10, 10, 15],
"align_score_2": [10, 10, 15],
"align_base_qscore_2": [10, 10, 15],
- }
+ },
)
@@ -771,7 +773,7 @@ def labelled_binary_contacts_diff_chrom_2d():
"align_score_2": [10, 10, 15],
"align_base_qscore_2": [10, 10, 15],
"metadata_2": ["B", "B", "A"],
- }
+ },
)
@@ -795,7 +797,7 @@ def labelled_binary_contacts_diff_chrom_2d_flipped():
"align_score_2": [10, 10, 15],
"align_base_qscore_2": [10, 10, 15],
"metadata_2": ["B", "B", "A"],
- }
+ },
)
@@ -826,7 +828,7 @@ def labelled_binary_contacts_diff_chrom_3d():
"align_score_3": [10, 10, 15, 14],
"align_base_qscore_3": [10, 10, 15, 14],
"metadata_3": ["B", "B", "B", "A"],
- }
+ },
)
@@ -857,5 +859,5 @@ def labelled_binary_contacts_diff_chrom_3d_flipped():
"align_score_3": [10, 10, 15, 20],
"align_base_qscore_3": [10, 10, 15, 20],
"metadata_3": ["B", "B", "B", "A"],
- }
+ },
)
diff --git a/tests/contacts_tests/test_contacts.py b/tests/contacts_tests/test_contacts.py
index edb467c..effaaf1 100644
--- a/tests/contacts_tests/test_contacts.py
+++ b/tests/contacts_tests/test_contacts.py
@@ -1,20 +1,10 @@
"""Tests for the contacts module."""
-
# pylint: disable=redefined-outer-name
-import pytest
import pandas as pd
-import pandera as pa
-import numpy as np
-import dask.dataframe as dd
+import pytest
from spoc import contacts
-# pytlint: disable=unused-import
-from ..fixtures.symmetry import (
- unlabelled_contacts_2d,
- labelled_binary_contacts_2d_sorted,
-)
-
@pytest.fixture
def contact_manipulator():
diff --git a/tests/contacts_tests/test_symmetry.py b/tests/contacts_tests/test_symmetry.py
index cb7ebdf..0eea19f 100644
--- a/tests/contacts_tests/test_symmetry.py
+++ b/tests/contacts_tests/test_symmetry.py
@@ -1,45 +1,12 @@
"""Tests for dealing with symmetry flipping for labelled and unlabelled contacts."""
# pylint: disable=redefined-outer-name
# pylint: disable=unused-import
-
-import pytest
-import pandas as pd
-import pandera as pa
-import numpy as np
import dask.dataframe as dd
-from spoc.contacts import Contacts, ContactManipulator
-from ..fixtures.symmetry import (
- unlabelled_contacts_2d,
- unlabelled_contacts_2d_flipped,
- unlabelled_contacts_3d,
- unlabelled_contacts_3d_flipped,
- labelled_binary_contacts_2d,
- labelled_binary_contacts_2d_sorted,
- labelled_binary_contacts_3d,
- labelled_binary_contacts_3d_sorted,
- binary_contacts_not_equated_2d,
- binary_contacts_not_equated_3d,
- binary_contacts_not_equated_4d,
- binary_contacts_equated_2d,
- binary_contacts_equated_3d,
- binary_contacts_equated_4d,
- labelled_binary_contacts_2d_unflipped,
- labelled_binary_contacts_2d_flipped,
- labelled_binary_contacts_3d_unflipped,
- labelled_binary_contacts_3d_unflipped_example2,
- labelled_binary_contacts_3d_flipped,
- labelled_binary_contacts_3d_flipped_example2,
- unlabelled_contacts_diff_chrom_2d,
- unlabelled_contacts_diff_chrom_3d,
- unlabelled_contacts_diff_chrom_4d,
- unlabelled_contacts_diff_chrom_3d_flipped,
- unlabelled_contacts_diff_chrom_2d_flipped,
- unlabelled_contacts_diff_chrom_4d_flipped,
- labelled_binary_contacts_diff_chrom_2d,
- labelled_binary_contacts_diff_chrom_2d_flipped,
- labelled_binary_contacts_diff_chrom_3d,
- labelled_binary_contacts_diff_chrom_3d_flipped,
-)
+import pandas as pd
+import pytest
+
+from spoc.contacts import ContactManipulator
+from spoc.contacts import Contacts
@pytest.mark.parametrize(
diff --git a/tests/fragments_tests/test_fragments.py b/tests/fragments_tests/test_fragments.py
index 0cbbad6..4e35ce0 100644
--- a/tests/fragments_tests/test_fragments.py
+++ b/tests/fragments_tests/test_fragments.py
@@ -1,19 +1,13 @@
"""Tests for the fragments module."""
-
# pylint: disable=redefined-outer-name
-import pytest
-import pandas as pd
-import numpy as np
import dask.dataframe as dd
+import numpy as np
+import pandas as pd
+import pytest
+from spoc import contacts
from spoc import fragments
-# pytlint: disable=unused-import
-from ..fixtures.symmetry import (
- unlabelled_contacts_2d,
- labelled_binary_contacts_2d_sorted,
-)
-
@pytest.fixture
def triplet_expander():
diff --git a/tests/fragments_tests/test_labels.py b/tests/fragments_tests/test_labels.py
index 3a8a593..f2ca601 100644
--- a/tests/fragments_tests/test_labels.py
+++ b/tests/fragments_tests/test_labels.py
@@ -1,11 +1,10 @@
"""Tests for label functionality"""
# pylint: disable=redefined-outer-name
-
-import pytest
+import dask.dataframe as dd
+import numpy as np
import pandas as pd
import pandera as pa
-import numpy as np
-import dask.dataframe as dd
+import pytest
from spoc import fragments
diff --git a/tests/io_tests/test_io_contacts.py b/tests/io_tests/test_io_contacts.py
index 856b853..567f1ee 100644
--- a/tests/io_tests/test_io_contacts.py
+++ b/tests/io_tests/test_io_contacts.py
@@ -1,16 +1,18 @@
"""This file tests the io module for contacts"""
# pylint: disable=redefined-outer-name
-import tempfile
-import os
import json
+import os
import shutil
+import tempfile
from pathlib import Path
-import pytest
+
import dask.dataframe as dd
+import pytest
+
from spoc.contacts import Contacts
from spoc.io import FileManager
+from spoc.models.dataframe_models import DataMode
from spoc.models.file_parameter_models import ContactsParameters
-from ..fixtures.symmetry import unlabelled_contacts_2d, labelled_binary_contacts_2d
def _create_tmp_dir():
@@ -98,7 +100,7 @@ def test_read_contacts_as_pandas_df(example_contacts_w_metadata):
contacts_dir, expected_parameters, paths, dataframes = example_contacts_w_metadata
# read metadata
for path, expected, df in zip(paths, expected_parameters, dataframes):
- contacts = FileManager(use_dask=False).load_contacts(contacts_dir, expected)
+ contacts = FileManager().load_contacts(contacts_dir, expected)
assert contacts.get_global_parameters() == expected
assert contacts.data.equals(df)
@@ -108,7 +110,7 @@ def test_read_contacts_as_dask_df(example_contacts_w_metadata):
contacts_dir, expected_parameters, paths, dataframes = example_contacts_w_metadata
# read metadata
for path, expected, df in zip(paths, expected_parameters, dataframes):
- contacts = FileManager(use_dask=True).load_contacts(contacts_dir, expected)
+ contacts = FileManager(DataMode.DASK).load_contacts(contacts_dir, expected)
assert contacts.get_global_parameters() == expected
assert contacts.data.compute().equals(df)
@@ -306,7 +308,7 @@ def test_load_contacts_from_uri_fails_without_required_parameters(df, params, re
file_name = tmpdirname + "/" + "test.parquet"
FileManager().write_contacts(file_name, contacts)
# try loading without required parameters
- with pytest.raises(ValueError) as e:
+ with pytest.raises(ValueError):
Contacts.from_uri(file_name)
@@ -427,5 +429,5 @@ def test_load_contacts_from_uri_fails_with_ambiguous_specification(df, params, r
FileManager().write_contacts(file_name, contacts)
FileManager().write_contacts(file_name, contacts2)
# load contacts
- with pytest.raises(ValueError) as e:
+ with pytest.raises(ValueError):
Contacts.from_uri(file_name + "::" + uri)
diff --git a/tests/io_tests/test_io_pixels.py b/tests/io_tests/test_io_pixels.py
index dc4f09b..f4c622f 100644
--- a/tests/io_tests/test_io_pixels.py
+++ b/tests/io_tests/test_io_pixels.py
@@ -1,14 +1,17 @@
"""This file tests the io module for pixels"""
# pylint: disable=redefined-outer-name
-import tempfile
-import os
import json
+import os
import shutil
+import tempfile
from pathlib import Path
-import pytest
-import pandas as pd
+
import dask.dataframe as dd
+import pandas as pd
+import pytest
+
from spoc.io import FileManager
+from spoc.models.dataframe_models import DataMode
from spoc.models.file_parameter_models import PixelParameters
from spoc.pixels import Pixels
@@ -119,24 +122,12 @@ def test_read_pixels_metadata_json_fails_gracefully():
assert e.value == "Metadata file not found at bad_path/metadata.json"
-def test_read_pixels_as_path(example_pixels_w_metadata):
- """Test reading pixels metadata json file"""
- pixels_dir, expected_parameters, paths, _ = example_pixels_w_metadata
- # read metadata
- for path, expected in zip(paths, expected_parameters):
- pixels = FileManager().load_pixels(pixels_dir, expected, load_dataframe=False)
- assert pixels.path == path
- assert pixels.get_global_parameters() == expected
-
-
def test_read_pixels_as_pandas_df(example_pixels_w_metadata):
"""Test reading pixels metadata json file"""
pixels_dir, expected_parameters, paths, dataframes = example_pixels_w_metadata
# read metadata
for path, expected, df in zip(paths, expected_parameters, dataframes):
- pixels = FileManager(use_dask=False).load_pixels(
- pixels_dir, expected, load_dataframe=True
- )
+ pixels = FileManager(DataMode.PANDAS).load_pixels(pixels_dir, expected)
assert pixels.get_global_parameters() == expected
assert pixels.data.equals(df)
@@ -146,9 +137,7 @@ def test_read_pixels_as_dask_df(example_pixels_w_metadata):
pixels_dir, expected_parameters, paths, dataframes = example_pixels_w_metadata
# read metadata
for path, expected, df in zip(paths, expected_parameters, dataframes):
- pixels = FileManager(use_dask=True).load_pixels(
- pixels_dir, expected, load_dataframe=True
- )
+ pixels = FileManager(DataMode.DASK).load_pixels(pixels_dir, expected)
assert pixels.get_global_parameters() == expected
assert pixels.data.compute().equals(df)
@@ -282,7 +271,7 @@ def test_load_pixels_from_uri_fails_without_required_parameters(df, params, requ
file_name = tmpdirname + "/" + "test.parquet"
FileManager().write_pixels(file_name, pixels)
# try loading without required parameters
- with pytest.raises(ValueError) as e:
+ with pytest.raises(ValueError):
Pixels.from_uri(file_name)
@@ -418,5 +407,5 @@ def test_load_pixels_from_uri_fails_with_ambiguous_specification(df, params, req
FileManager().write_pixels(file_name, pixels)
FileManager().write_pixels(file_name, pixels2)
# load pixels
- with pytest.raises(ValueError) as e:
+ with pytest.raises(ValueError):
Pixels.from_uri(file_name + "::" + uri)
diff --git a/tests/fixtures/__init__.py b/tests/query_engine/__init__.py
similarity index 100%
rename from tests/fixtures/__init__.py
rename to tests/query_engine/__init__.py
diff --git a/tests/query_engine/conftest.py b/tests/query_engine/conftest.py
new file mode 100644
index 0000000..e75fdf7
--- /dev/null
+++ b/tests/query_engine/conftest.py
@@ -0,0 +1,149 @@
+"""Shared fixtures for query engine tests."""
+from __future__ import annotations
+
+import pandas as pd
+import pytest
+
+from spoc.contacts import Contacts
+from spoc.pixels import Pixels
+from spoc.query_engine import Anchor
+from spoc.query_engine import Overlap
+
+
+@pytest.fixture(name="example_2d_df")
+def example_2d_df_fixture():
+ """Example 2d contacts"""
+ return pd.DataFrame(
+ {
+ "chrom_1": ["chr1", "chr1", "chr1", "chr1"],
+ "start_1": [100, 100, 750, 400],
+ "end_1": [200, 200, 780, 500],
+ "mapping_quality_1": [30, 30, 30, 30],
+ "align_score_1": [100, 100, 100, 100],
+ "align_base_qscore_1": [100, 100, 100, 100],
+ "chrom_2": ["chr1", "chr1", "chr1", "chr1"],
+ "start_2": [300, 100, 300, 750],
+ "end_2": [400, 200, 400, 780],
+ "mapping_quality_2": [30, 30, 30, 30],
+ "align_score_2": [100, 100, 100, 100],
+ "align_base_qscore_2": [100, 100, 100, 100],
+ # read name serves as id
+ "read_name": ["read1", "read2", "read3", "read4"],
+ "read_length": [100, 100, 100, 100],
+ },
+ )
+
+
+@pytest.fixture(name="pixel_dataframe")
+def pixel_dataframe_fixture():
+ """A dataframe containing pixels"""
+ return pd.DataFrame(
+ {
+ "chrom": ["chr1"] * 4,
+ "start_1": [180, 180, 750, 400],
+ "start_2": [300, 180, 300, 750],
+ "count": [1, 2, 3, 4], # contact count is id
+ },
+ )
+
+
+@pytest.fixture(name="single_region")
+def single_region_fixture():
+ """Single region"""
+ return pd.DataFrame(
+ {
+ "chrom": ["chr1"],
+ "start": [150],
+ "end": [200],
+ },
+ )
+
+
+@pytest.fixture(name="single_region_2")
+def single_region_2_fixture():
+ """Single region"""
+ return pd.DataFrame(
+ {
+ "chrom": ["chr1"],
+ "start": [700],
+ "end": [800],
+ },
+ )
+
+
+@pytest.fixture(name="single_region_3")
+def single_region_3_fixture():
+ """Single region"""
+ return pd.DataFrame(
+ {
+ "chrom": ["chr1"],
+ "start": [750],
+ "end": [850],
+ },
+ )
+
+
+@pytest.fixture(name="multi_region")
+def multi_region_fixture():
+ """Multi region"""
+ return pd.DataFrame(
+ {
+ "chrom": ["chr1", "chr1"],
+ "start": [150, 700],
+ "end": [200, 800],
+ },
+ )
+
+
+@pytest.fixture(name="multi_region_2")
+def multi_region_2_fixture():
+ """Multi region"""
+ return pd.DataFrame(
+ {
+ "chrom": ["chr1", "chr1"],
+ "start": [150, 180],
+ "end": [200, 220],
+ },
+ )
+
+
+@pytest.fixture(name="contacts_without_regions")
+def contacts_without_regions_fixture(example_2d_df):
+ """Example 2d contacts"""
+ return Contacts(example_2d_df)
+
+
+@pytest.fixture(name="pixels_without_regions")
+def pixels_wihtout_regions_fixture(pixel_dataframe):
+ """Pixels without regions"""
+ return Pixels(pixel_dataframe, number_fragments=2, binsize=10)
+
+
+@pytest.fixture(name="contacts_with_single_region")
+def contacts_with_single_region_fixture(contacts_without_regions, single_region):
+ """Contacts with single region"""
+ return Overlap(single_region, anchor_mode=Anchor(mode="ANY"))(
+ contacts_without_regions,
+ )
+
+
+@pytest.fixture(name="contacts_with_multiple_regions")
+def contacts_with_multiple_regions_fixture(contacts_without_regions, multi_region):
+ """Contacts with multiple regions"""
+ return Overlap(multi_region, anchor_mode=Anchor(mode="ANY"))(
+ contacts_without_regions,
+ )
+
+
+@pytest.fixture(name="pixels_with_single_region")
+def pixels_with_single_region_fixture(pixels_without_regions, single_region):
+ """Pixels with single region"""
+ return Overlap(single_region, anchor_mode=Anchor(mode="ANY"))(
+ pixels_without_regions,
+ )
+
+
+@pytest.fixture(name="pixels_with_multiple_regions")
+def pixels_with_multiple_regions_fixture(pixels_without_regions, multi_region):
+ """Pixels with multiple regions"""
+ return Overlap(multi_region, anchor_mode=Anchor(mode="ANY"))(pixels_without_regions)
diff --git a/tests/query_engine/test_contact_selection.py b/tests/query_engine/test_contact_selection.py
new file mode 100644
index 0000000..daae42d
--- /dev/null
+++ b/tests/query_engine/test_contact_selection.py
@@ -0,0 +1,300 @@
+"""Tests for contact selection"""
+import dask.dataframe as dd
+import duckdb
+import pytest
+
+from spoc.contacts import Contacts
+from spoc.io import DUCKDB_CONNECTION
+from spoc.query_engine import Anchor
+from spoc.query_engine import Overlap
+from spoc.query_engine import Query
+
+
+@pytest.fixture(name="example_2d_contacts_pandas")
+def example_2d_contacts_pandas_fixture(example_2d_df):
+ """Example 2d contacts"""
+ return Contacts(example_2d_df)
+
+
+@pytest.fixture(name="example_2d_contacts_dask")
+def example_2d_contacts_dask_fixture(example_2d_df):
+ """Example 2d contacts"""
+ return Contacts(dd.from_pandas(example_2d_df, npartitions=2))
+
+
+@pytest.fixture(name="example_2d_contacts_duckdb")
+def example_2d_contacts_duckdb_fixture(example_2d_df):
+ """Example 2d contacts"""
+ return Contacts(duckdb.from_df(example_2d_df, connection=DUCKDB_CONNECTION))
+
+
+# happy path
+
+
+@pytest.mark.parametrize(
+ "contact_fixture",
+ [
+ "example_2d_contacts_pandas",
+ "example_2d_contacts_duckdb",
+ ],
+)
+def test_no_filter_returns_all_contacts(contact_fixture, request):
+ """Test that no filter returns all contacts"""
+ contacts = request.getfixturevalue(contact_fixture)
+ query = Query(query_steps=[])
+ result = query.build(contacts)
+ assert result.compute().shape[0] == 4
+
+
+@pytest.mark.parametrize(
+ "contact_fixture",
+ [
+ "example_2d_contacts_pandas",
+ "example_2d_contacts_dask",
+ "example_2d_contacts_duckdb",
+ ],
+)
+def test_any_anchor_region_returns_correct_contacts(
+ contact_fixture, single_region, request
+):
+ """Test that any anchor region returns correct contacts"""
+ # setup
+ contacts = request.getfixturevalue(contact_fixture)
+ query_plan = [Overlap(regions=single_region, anchor_mode=Anchor(mode="ANY"))]
+ # execution
+ query = Query(query_steps=query_plan)
+ result = query.build(contacts)
+ # test
+ assert result.compute().shape[0] == 2
+ assert sorted(result.compute().read_name.tolist()) == sorted(["read1", "read2"])
+
+
+@pytest.mark.parametrize(
+ "contact_fixture",
+ [
+ "example_2d_contacts_pandas",
+ "example_2d_contacts_dask",
+ "example_2d_contacts_duckdb",
+ ],
+)
+def test_all_anchor_regions_returns_correct_contacts(
+ contact_fixture, single_region, request
+):
+ """Test that all anchor regions returns correct contacts"""
+ # setup
+ contacts = request.getfixturevalue(contact_fixture)
+ query_plan = [Overlap(regions=single_region, anchor_mode=Anchor(mode="ALL"))]
+ # execution
+ query = Query(query_steps=query_plan)
+ result = query.build(contacts)
+ # test
+ assert result.compute().shape[0] == 1
+ assert sorted(result.compute().read_name.tolist()) == sorted(["read2"])
+
+
+@pytest.mark.parametrize(
+ "contact_fixture,anchors,expected_reads",
+ [
+ (source_data, anchors, expected_reads)
+ for source_data, anchors, expected_reads in zip(
+ [
+ "example_2d_contacts_pandas",
+ "example_2d_contacts_dask",
+ "example_2d_contacts_duckdb",
+ ]
+ * 3,
+ [[1]] * 3 + [[2]] * 3 + [[1, 2]] * 3,
+ [["read3"]] * 3 + [["read4"]] * 3 + [[]] * 3,
+ )
+ ],
+)
+def test_specific_anchor_regions_returns_correct_contacts(
+ contact_fixture, anchors, expected_reads, single_region_2, request
+):
+ """Test that specific anchor regions returns correct contacts"""
+ # setup
+ contacts = request.getfixturevalue(contact_fixture)
+ query_plan = [
+ Overlap(
+ regions=single_region_2, anchor_mode=Anchor(mode="ALL", anchors=anchors)
+ )
+ ]
+ # execution
+ query = Query(query_steps=query_plan)
+ result = query.build(contacts)
+ # test
+ assert result.compute().shape[0] == len(expected_reads)
+ assert sorted(result.compute().read_name.tolist()) == sorted(expected_reads)
+
+
+@pytest.mark.parametrize(
+ "contact_fixture,anchors,expected_reads",
+ [
+ (source_data, anchors, expected_reads)
+ for source_data, anchors, expected_reads in zip(
+ [
+ "example_2d_contacts_pandas",
+ "example_2d_contacts_dask",
+ "example_2d_contacts_duckdb",
+ ]
+ * 3,
+ [[1]] * 3 + [[2]] * 3 + [[1, 2]] * 3,
+ [["read3"]] * 3 + [["read4"]] * 3 + [[]] * 3,
+ )
+ ],
+)
+def test_specific_anchor_regions_returns_correct_contacts_point_region(
+ contact_fixture, anchors, expected_reads, single_region_3, request
+):
+ """Test that specific anchor regions returns correct contacts, when
+ the region is a point region and windowsize is passed"""
+ # setup
+ contacts = request.getfixturevalue(contact_fixture)
+ query_plan = [
+ Overlap(
+ regions=single_region_3,
+ anchor_mode=Anchor(mode="ALL", anchors=anchors),
+ half_window_size=50,
+ )
+ ]
+ # execution
+ query = Query(query_steps=query_plan)
+ result = query.build(contacts)
+ # test
+ assert result.compute().shape[0] == len(expected_reads)
+ assert sorted(result.compute().read_name.tolist()) == sorted(expected_reads)
+ assert result.get_schema().get_half_window_size() == 50
+
+
+@pytest.mark.parametrize(
+ "contact_fixture",
+ [
+ "example_2d_contacts_pandas",
+ "example_2d_contacts_dask",
+ "example_2d_contacts_duckdb",
+ ],
+)
+def test_any_anchor_region_returns_correct_contacts_multi_region(
+ contact_fixture, multi_region, request
+):
+ """Test that any anchor region returns correct contacts"""
+ # setup
+ contacts = request.getfixturevalue(contact_fixture)
+ query_plan = [Overlap(regions=multi_region, anchor_mode=Anchor(mode="ANY"))]
+ # execution
+ query = Query(query_steps=query_plan)
+ result = query.build(contacts)
+ # test
+ assert result.compute().shape[0] == 4
+ assert sorted(result.compute().read_name.tolist()) == sorted(
+ ["read1", "read2", "read3", "read4"]
+ )
+
+
+@pytest.mark.parametrize(
+ "contact_fixture",
+ [
+ "example_2d_contacts_pandas",
+ "example_2d_contacts_dask",
+ "example_2d_contacts_duckdb",
+ ],
+)
+def test_all_anchor_regions_returns_correct_contacts_multi_region(
+ contact_fixture, multi_region, request
+):
+ """Test that all anchor regions returns correct contacts"""
+ # setup
+ contacts = request.getfixturevalue(contact_fixture)
+ query_plan = [Overlap(regions=multi_region, anchor_mode=Anchor(mode="ALL"))]
+ # execution
+ query = Query(query_steps=query_plan)
+ result = query.build(contacts)
+ # test
+ assert result.compute().shape[0] == 1
+ assert sorted(result.compute().read_name.tolist()) == sorted(["read2"])
+
+
+@pytest.mark.parametrize(
+ "contact_fixture",
+ [
+ "example_2d_contacts_pandas",
+ "example_2d_contacts_dask",
+ "example_2d_contacts_duckdb",
+ ],
+)
+def test_contacts_duplicated_for_multiple_overlapping_regions(
+ contact_fixture, multi_region_2, request
+):
+ """
+ This test verifies that when multiple overlapping regions are specified as anchor regions,
+ the query returns duplicated contacts for each overlapping region.
+ """
+ # setup
+ contacts = request.getfixturevalue(contact_fixture)
+ query_plan = [Overlap(regions=multi_region_2, anchor_mode=Anchor(mode="ALL"))]
+ # execution
+ query = Query(query_steps=query_plan)
+ result = query.build(contacts)
+ # test
+ assert result.compute().shape[0] == 2
+ assert sorted(result.compute().read_name.tolist()) == sorted(["read2", "read2"])
+
+
+@pytest.mark.parametrize(
+ "contact_fixture,anchors,expected_reads",
+ [
+ (source_data, anchors, expected_reads)
+ for source_data, anchors, expected_reads in zip(
+ [
+ "example_2d_contacts_pandas",
+ "example_2d_contacts_dask",
+ "example_2d_contacts_duckdb",
+ ]
+ * 3,
+ [[1]] * 3 + [[2]] * 3 + [[1, 2]] * 3,
+ [["read1", "read2", "read3"]] * 3
+ + [["read2", "read4"]] * 3
+ + [["read2"]] * 3,
+ )
+ ],
+)
+def test_specific_anchor_regions_returns_correct_contacts_multi_region(
+ contact_fixture, anchors, expected_reads, multi_region, request
+):
+ """Test that specific anchor regions returns correct contacts"""
+ # setup
+ contacts = request.getfixturevalue(contact_fixture)
+ query_plan = [
+ Overlap(regions=multi_region, anchor_mode=Anchor(mode="ALL", anchors=anchors))
+ ]
+ # execution
+ query = Query(query_steps=query_plan)
+ result = query.build(contacts)
+ # test
+ assert result.compute().shape[0] == len(expected_reads)
+ assert sorted(result.compute().read_name.tolist()) == sorted(expected_reads)
+
+
+# validation problems
+
+
+@pytest.mark.parametrize(
+ "contact_fixture",
+ [
+ "example_2d_contacts_pandas",
+ "example_2d_contacts_dask",
+ "example_2d_contacts_duckdb",
+ ],
+)
+def test_specific_anchor_region_not_in_contacts_raises_error(
+ contact_fixture, single_region, request
+):
+ """Test that specific anchor region not in contacts raises error"""
+ # setup
+ contacts = request.getfixturevalue(contact_fixture)
+ query_plan = [
+ Overlap(regions=single_region, anchor_mode=Anchor(mode="ALL", anchors=[3]))
+ ]
+ with pytest.raises(ValueError):
+ query = Query(query_steps=query_plan)
+ query.build(contacts)
diff --git a/tests/query_engine/test_distance_aggregation.py b/tests/query_engine/test_distance_aggregation.py
new file mode 100644
index 0000000..c77ae86
--- /dev/null
+++ b/tests/query_engine/test_distance_aggregation.py
@@ -0,0 +1,502 @@
+"""Tests for the aggregation functions in the query engine."""
+from itertools import product
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from spoc.pixels import Pixels
+from spoc.query_engine import AggregationFunction
+from spoc.query_engine import Anchor
+from spoc.query_engine import DistanceAggregation
+from spoc.query_engine import DistanceMode
+from spoc.query_engine import DistanceTransformation
+from spoc.query_engine import Overlap
+from spoc.query_engine import Query
+
+
+@pytest.fixture(name="pixels_with_distance")
+def pixels_with_distance_fixture(pixels_with_single_region):
+ """Pixels with single region"""
+ return DistanceTransformation()(pixels_with_single_region)
+
+
+@pytest.fixture(name="complete_synthetic_pixels_df")
+def complete_synthetic_pixels_df_fixture():
+ """Pixels that span two regions densely"""
+ np.random.seed(42)
+ # genomic region_1
+ pixels_1 = [
+ {
+ "chrom": tup[0],
+ "start_1": tup[1],
+ "start_2": tup[2],
+ "start_3": tup[3],
+ "count": np.random.randint(0, 10),
+ }
+ for tup in product(
+ ["chr1"],
+ np.arange(900_000, 1_150_000, 50_000),
+ np.arange(900_000, 1_150_000, 50_000),
+ np.arange(900_000, 1_150_000, 50_000),
+ )
+ ]
+ # genomic region_2
+ pixels_2 = [
+ {
+ "chrom": tup[0],
+ "start_1": tup[1],
+ "start_2": tup[2],
+ "start_3": tup[3],
+ "count": np.random.randint(0, 10),
+ }
+ for tup in product(
+ ["chr2"],
+ np.arange(900_000, 1_150_000, 50_000),
+ np.arange(900_000, 1_150_000, 50_000),
+ np.arange(900_000, 1_150_000, 50_000),
+ )
+ ]
+ return pd.concat((pd.DataFrame(pixels_1), pd.DataFrame(pixels_2)))
+
+
+@pytest.fixture(name="incomplete_synthetic_pixels_df")
+def incomplete_synthetic_pixels_df_fixture():
+ """Pixels that span two regions sparsely"""
+ np.random.seed(42)
+ # genomic region 1
+ pixels_1 = [
+ {
+ "chrom": tup[0],
+ "start_1": tup[1],
+ "start_2": tup[2],
+ "start_3": tup[3],
+ "count": np.random.randint(0, 10),
+ }
+ for tup in product(
+ ["chr1"],
+ np.arange(900_000, 1_000_000, 50_000),
+ np.arange(900_000, 1_000_000, 50_000),
+ np.arange(900_000, 1_000_000, 50_000),
+ )
+ ]
+ # genomic region_2
+ pixels_2 = [
+ {
+ "chrom": tup[0],
+ "start_1": tup[1],
+ "start_2": tup[2],
+ "start_3": tup[3],
+ "count": np.random.randint(0, 10),
+ }
+ for tup in product(
+ ["chr2"],
+ np.arange(1_000_000, 1_150_000, 50_000),
+ np.arange(1_000_000, 1_150_000, 50_000),
+ np.arange(1_000_000, 1_150_000, 50_000),
+ )
+ ]
+ return pd.concat((pd.DataFrame(pixels_1), pd.DataFrame(pixels_2)))
+
+
+@pytest.fixture(name="incomplete_synthetic_pixels_dense_df")
+def incomplete_synthetic_pixels_dense_df_fixture(
+ complete_synthetic_pixels_df, incomplete_synthetic_pixels_df
+):
+ """Pixels that span two regions sparsely
+ with missing pixels filled with 0."""
+ return incomplete_synthetic_pixels_df.merge(
+ complete_synthetic_pixels_df[["chrom", "start_1", "start_2", "start_3"]],
+ on=["chrom", "start_1", "start_2", "start_3"],
+ how="outer",
+ ).fillna(0)
+
+
+@pytest.fixture
+def single_region():
+ """Single region"""
+ return pd.DataFrame(
+ {
+ "chrom": ["chr1"],
+ "start": [900_000],
+ "end": [1_100_000],
+ },
+ index=[0],
+ )
+
+
+@pytest.fixture
+def single_region_not_binaligned():
+ """Single region that is not aligned to the bin size"""
+ return pd.DataFrame(
+ {
+ "chrom": ["chr1"],
+ "start": [920_000],
+ "end": [1_120_000],
+ },
+ index=[0],
+ )
+
+
+@pytest.fixture
+def two_regions():
+ """Two regions"""
+ return pd.DataFrame(
+ {
+ "chrom": ["chr1", "chr2"],
+ "start": [900_000, 900_000],
+ "end": [1_100_000, 1_100_000],
+ }
+ )
+
+
+@pytest.mark.parametrize(
+ "genomic_data_fixture",
+ [
+ "contacts_without_regions",
+ "pixels_without_regions",
+ "contacts_with_single_region",
+ "contacts_with_multiple_regions",
+ "pixels_with_single_region",
+ "pixels_with_multiple_regions",
+ ],
+)
+def test_input_wo_distance_rejected(genomic_data_fixture, request):
+ """Test that the validation fails for incorrect inputs."""
+ genomic_data = request.getfixturevalue(genomic_data_fixture)
+ with pytest.raises(ValueError):
+ query = Query(
+ query_steps=[
+ DistanceAggregation(
+ value_column="value", function=AggregationFunction.COUNT
+ ),
+ ],
+ )
+ query.build(genomic_data)
+
+
+def test_input_wo_data_column_rejected(pixels_with_distance):
+ """Test that the validation fails for incorrect inputs."""
+ with pytest.raises(ValueError):
+ query = Query(
+ query_steps=[
+ DistanceAggregation(
+ value_column="test_column_does_no_exist",
+ function=AggregationFunction.AVG,
+ ),
+ ],
+ )
+ query.build(pixels_with_distance)
+
+
+def test_contacts_rejected(contacts_with_single_region):
+ """Test that the validation fails for incorrect inputs."""
+ with pytest.raises(ValueError):
+ query = Query(
+ query_steps=[
+ DistanceAggregation(
+ value_column="value", function=AggregationFunction.COUNT
+ ),
+ ],
+ )
+ query.build(contacts_with_single_region)
+
+
+@pytest.mark.parametrize(
+ "aggregation_spoc, aggregation_pandas, region_fixture",
+ [
+ (AggregationFunction.COUNT, "count", "single_region"),
+ (AggregationFunction.COUNT, "count", "two_regions"),
+ (AggregationFunction.COUNT, "count", "single_region_not_binaligned"),
+ ("COUNT", "count", "single_region_not_binaligned"),
+ (AggregationFunction.SUM, "sum", "single_region"),
+ (AggregationFunction.SUM, "sum", "two_regions"),
+ (AggregationFunction.SUM, "sum", "single_region_not_binaligned"),
+ ("SUM", "sum", "single_region_not_binaligned"),
+ (AggregationFunction.AVG, "mean", "single_region"),
+ (AggregationFunction.AVG, "mean", "two_regions"),
+ (AggregationFunction.AVG, "mean", "single_region_not_binaligned"),
+ ("AVG", "mean", "single_region_not_binaligned"),
+ ],
+)
+def test_aggregations_on_dense_input(
+ complete_synthetic_pixels_df,
+ aggregation_spoc,
+ aggregation_pandas,
+ region_fixture,
+ request,
+):
+ """Test sum aggregation on dense input."""
+ # setup (pixels here are points to make the test easier)
+ pixels = Pixels(complete_synthetic_pixels_df, binsize=50_000, number_fragments=3)
+ region = request.getfixturevalue(region_fixture)
+ mapped_pixels = Query(
+ query_steps=[
+ Overlap(region, anchor_mode=Anchor(mode="ANY")),
+ DistanceTransformation(distance_mode=DistanceMode.LEFT),
+ ],
+ ).build(pixels)
+ mapped_pixels_df = mapped_pixels.compute()
+ cat_dtype = pd.CategoricalDtype(range(-100_000, 150_000, 50_000))
+ mapped_pixels_df["distance_1"] = mapped_pixels_df["distance_1"].astype(cat_dtype)
+ mapped_pixels_df["distance_2"] = mapped_pixels_df["distance_2"].astype(cat_dtype)
+ mapped_pixels_df["distance_3"] = mapped_pixels_df["distance_3"].astype(cat_dtype)
+ expected_aggregation = (
+ mapped_pixels_df.groupby(["distance_1", "distance_2", "distance_3"])
+ .agg(count=("count", aggregation_pandas))
+ .astype(float)
+ .reset_index()
+ .rename(columns={"count": f"count_{aggregation_pandas}"})
+ .sort_values(["distance_1", "distance_2", "distance_3"])
+ )
+ # execute aggregation
+ query = Query(
+ query_steps=[
+ DistanceAggregation(
+ value_column="count", function=aggregation_spoc, densify_output=False
+ ),
+ ],
+ )
+ actual_aggregation = query.build(mapped_pixels).compute()
+ # test
+ np.testing.assert_array_almost_equal(
+ expected_aggregation.values,
+ actual_aggregation.values,
+ )
+
+
+@pytest.mark.parametrize(
+ "aggregation_spoc, aggregation_pandas, region_fixture",
+ [
+ (AggregationFunction.COUNT, "count", "single_region"),
+ (AggregationFunction.COUNT, "count", "two_regions"),
+ (AggregationFunction.COUNT, "count", "single_region_not_binaligned"),
+ ("COUNT", "count", "single_region_not_binaligned"),
+ (AggregationFunction.SUM, "sum", "single_region"),
+ (AggregationFunction.SUM, "sum", "two_regions"),
+ (AggregationFunction.SUM, "sum", "single_region_not_binaligned"),
+ ("SUM", "sum", "single_region_not_binaligned"),
+ (AggregationFunction.AVG, "mean", "single_region"),
+ (AggregationFunction.AVG, "mean", "two_regions"),
+ (AggregationFunction.AVG, "mean", "single_region_not_binaligned"),
+ ("AVG", "mean", "single_region_not_binaligned"),
+ ],
+)
+def test_aggregations_on_dense_input_with_reduced_dimensionality(
+ complete_synthetic_pixels_df,
+ aggregation_spoc,
+ aggregation_pandas,
+ region_fixture,
+ request,
+):
+ """Test sum aggregation on dense input."""
+ # setup (pixels here are points to make the test easier)
+ pixels = Pixels(complete_synthetic_pixels_df, binsize=50_000, number_fragments=3)
+ region = request.getfixturevalue(region_fixture)
+ mapped_pixels = Query(
+ query_steps=[
+ Overlap(region, anchor_mode=Anchor(mode="ANY")),
+ DistanceTransformation(distance_mode=DistanceMode.LEFT),
+ ],
+ ).build(pixels)
+ mapped_pixels_df = mapped_pixels.compute()
+ cat_dtype = pd.CategoricalDtype(range(-100_000, 150_000, 50_000))
+ mapped_pixels_df["distance_1"] = mapped_pixels_df["distance_1"].astype(cat_dtype)
+ mapped_pixels_df["distance_2"] = mapped_pixels_df["distance_2"].astype(cat_dtype)
+ expected_aggregation = (
+ mapped_pixels_df.groupby(["distance_1", "distance_2"])
+ .agg(count=("count", aggregation_pandas))
+ .astype(float)
+ .reset_index()
+ .rename(columns={"count": f"count_{aggregation_pandas}"})
+ .sort_values(["distance_1", "distance_2"])
+ )
+ # execute aggregation
+ query = Query(
+ query_steps=[
+ DistanceAggregation(
+ value_column="count",
+ function=aggregation_spoc,
+ densify_output=False,
+ position_list=[1, 2],
+ ),
+ ],
+ )
+ actual_aggregation = query.build(mapped_pixels).compute()
+ # test
+ np.testing.assert_array_almost_equal(
+ expected_aggregation.values,
+ actual_aggregation.values,
+ )
+
+
+# pylint: disable=too-many-arguments
+@pytest.mark.parametrize(
+ "aggregation_spoc, aggregation_pandas, region_fixture",
+ [
+ (AggregationFunction.COUNT, "count", "single_region"),
+ (AggregationFunction.COUNT, "count", "two_regions"),
+ (AggregationFunction.COUNT, "count", "single_region_not_binaligned"),
+ (AggregationFunction.SUM, "sum", "single_region"),
+ (AggregationFunction.SUM, "sum", "two_regions"),
+ (AggregationFunction.SUM, "sum", "single_region_not_binaligned"),
+ (AggregationFunction.AVG, "mean", "single_region"),
+ (AggregationFunction.AVG, "mean", "two_regions"),
+ (AggregationFunction.AVG, "mean", "single_region_not_binaligned"),
+ (AggregationFunction.AVG_WITH_EMPTY, "mean", "single_region"),
+ (AggregationFunction.AVG_WITH_EMPTY, "mean", "two_regions"),
+ (AggregationFunction.AVG_WITH_EMPTY, "mean", "single_region_not_binaligned"),
+ ],
+)
+def test_aggregations_on_sparse_input(
+ incomplete_synthetic_pixels_df,
+ incomplete_synthetic_pixels_dense_df,
+ aggregation_spoc,
+ aggregation_pandas,
+ region_fixture,
+ request,
+):
+ """Test sum aggregation on dense input."""
+ # setup
+ incomplete_pixels = Pixels(
+ incomplete_synthetic_pixels_df, binsize=50_000, number_fragments=3
+ )
+ incomplete_dense_pixels = Pixels(
+ incomplete_synthetic_pixels_dense_df, binsize=50_000, number_fragments=3
+ )
+ query_plan = Query(
+ query_steps=[
+ Overlap(
+ request.getfixturevalue(region_fixture), anchor_mode=Anchor(mode="ANY")
+ ),
+ DistanceTransformation(distance_mode=DistanceMode.LEFT),
+ ],
+ )
+ mapped_pixels = query_plan.build(incomplete_pixels)
+ mapped_incomplete_dense_pixels_df = query_plan.build(
+ incomplete_dense_pixels
+ ).compute()
+ if aggregation_spoc == AggregationFunction.AVG_WITH_EMPTY:
+ # when we test the AVG_WITH_EMPTY function, we need to use the dense pixels
+ # where missing values with 0 count are filled in
+ pixel_frame_for_expected = mapped_incomplete_dense_pixels_df
+ else:
+ pixel_frame_for_expected = mapped_pixels.compute()
+ pixel_frame_for_expected["distance_1"] = pixel_frame_for_expected[
+ "distance_1"
+ ].astype(pd.CategoricalDtype(range(-100_000, 150_000, 50_000)))
+ pixel_frame_for_expected["distance_2"] = pixel_frame_for_expected[
+ "distance_2"
+ ].astype(pd.CategoricalDtype(range(-100_000, 150_000, 50_000)))
+ pixel_frame_for_expected["distance_3"] = pixel_frame_for_expected[
+ "distance_3"
+ ].astype(pd.CategoricalDtype(range(-100_000, 150_000, 50_000)))
+ expected_aggregation = (
+ pixel_frame_for_expected.groupby(["distance_1", "distance_2", "distance_3"])
+ .agg(count=("count", aggregation_pandas))
+ .astype(float)
+ .reset_index()
+ .rename(columns={"count": f"count_{aggregation_pandas}"})
+ .sort_values(["distance_1", "distance_2", "distance_3"])
+ )
+ # execute aggregation
+ query = Query(
+ query_steps=[
+ DistanceAggregation(
+ value_column="count", function=aggregation_spoc, densify_output=True
+ ),
+ ],
+ )
+ # test
+ np.testing.assert_array_almost_equal(
+ expected_aggregation.values, query.build(mapped_pixels).compute()
+ )
+
+
+# pylint: disable=too-many-arguments
+@pytest.mark.parametrize(
+ "aggregation_spoc, aggregation_pandas, region_fixture",
+ [
+ (AggregationFunction.COUNT, "count", "single_region"),
+ (AggregationFunction.COUNT, "count", "two_regions"),
+ (AggregationFunction.COUNT, "count", "single_region_not_binaligned"),
+ (AggregationFunction.SUM, "sum", "single_region"),
+ (AggregationFunction.SUM, "sum", "two_regions"),
+ (AggregationFunction.SUM, "sum", "single_region_not_binaligned"),
+ (AggregationFunction.AVG, "mean", "single_region"),
+ (AggregationFunction.AVG, "mean", "two_regions"),
+ (AggregationFunction.AVG, "mean", "single_region_not_binaligned"),
+ # for aggregation function with empty, we need to sum up and divide by region number
+ # just taking the mean with take the mean with respect to every triplet pixel
+ (AggregationFunction.AVG_WITH_EMPTY, lambda s: s.sum(), "single_region"),
+ (AggregationFunction.AVG_WITH_EMPTY, lambda s: s.sum() / 2, "two_regions"),
+ (
+ AggregationFunction.AVG_WITH_EMPTY,
+ lambda s: s.sum(),
+ "single_region_not_binaligned",
+ ),
+ ],
+)
+def test_aggregations_on_sparse_input_with_reduced_dimensionality(
+ incomplete_synthetic_pixels_df,
+ incomplete_synthetic_pixels_dense_df,
+ aggregation_spoc,
+ aggregation_pandas,
+ region_fixture,
+ request,
+):
+ """Test aggregation on sparse input with reduced dimensionality."""
+ # setup
+ incomplete_pixels = Pixels(
+ incomplete_synthetic_pixels_df, binsize=50_000, number_fragments=3
+ )
+ incomplete_dense_pixels = Pixels(
+ incomplete_synthetic_pixels_dense_df, binsize=50_000, number_fragments=3
+ )
+ query_plan = Query(
+ query_steps=[
+ Overlap(
+ request.getfixturevalue(region_fixture), anchor_mode=Anchor(mode="ANY")
+ ),
+ DistanceTransformation(distance_mode=DistanceMode.LEFT),
+ ],
+ )
+ mapped_pixels = query_plan.build(incomplete_pixels)
+ mapped_incomplete_dense_pixels_df = query_plan.build(
+ incomplete_dense_pixels
+ ).compute()
+ if aggregation_spoc == AggregationFunction.AVG_WITH_EMPTY:
+ # when we test the AVG_WITH_EMPTY function, we need to use the dense pixels
+ # where missing values with 0 count are filled in
+ pixel_frame_for_expected = mapped_incomplete_dense_pixels_df
+ else:
+ pixel_frame_for_expected = mapped_pixels.compute()
+ pixel_frame_for_expected["distance_1"] = pixel_frame_for_expected[
+ "distance_1"
+ ].astype(pd.CategoricalDtype(range(-100_000, 150_000, 50_000)))
+ pixel_frame_for_expected["distance_2"] = pixel_frame_for_expected[
+ "distance_2"
+ ].astype(pd.CategoricalDtype(range(-100_000, 150_000, 50_000)))
+ expected_aggregation = (
+ pixel_frame_for_expected.groupby(["distance_1", "distance_2"])
+ .agg(count=("count", aggregation_pandas))
+ .astype(float)
+ .reset_index()
+ .rename(columns={"count": f"count_{aggregation_pandas}"})
+ .sort_values(["distance_1", "distance_2"])
+ )
+ # execute aggregation
+ query = Query(
+ query_steps=[
+ DistanceAggregation(
+ value_column="count",
+ function=aggregation_spoc,
+ densify_output=True,
+ position_list=[1, 2],
+ ),
+ ],
+ )
+ # test
+ np.testing.assert_array_almost_equal(
+ expected_aggregation.values, query.build(mapped_pixels).compute()
+ )
diff --git a/tests/query_engine/test_distance_transformation.py b/tests/query_engine/test_distance_transformation.py
new file mode 100644
index 0000000..2278006
--- /dev/null
+++ b/tests/query_engine/test_distance_transformation.py
@@ -0,0 +1,125 @@
+"""Tests for transformations"""
+from __future__ import annotations
+
+import numpy as np
+import pytest
+
+from spoc.query_engine import DistanceMode
+from spoc.query_engine import DistanceTransformation
+from spoc.query_engine import Query
+
+
+@pytest.mark.parametrize(
+ "genomic_data_fixture",
+ [
+ "contacts_without_regions",
+ "pixels_without_regions",
+ ],
+)
+def test_incompatible_input_rejected(genomic_data_fixture, request):
+ """Tests that incompatible input raises a ValueError"""
+ genomic_data = request.getfixturevalue(genomic_data_fixture)
+ with pytest.raises(ValueError):
+ query = Query(
+ query_steps=[
+ DistanceTransformation(DistanceMode.MIDPOINT),
+ ],
+ )
+ query.build(genomic_data)
+
+
+@pytest.mark.parametrize(
+ "genomic_data_fixture",
+ ["contacts_with_single_region", "contacts_with_multiple_regions"],
+)
+def test_distance_calculated_correctly_contacts(genomic_data_fixture, request):
+ """Tests that the distance is calculated correctly for contacts"""
+ genomic_data = request.getfixturevalue(genomic_data_fixture)
+ query = Query(
+ query_steps=[
+ DistanceTransformation(DistanceMode.MIDPOINT),
+ ],
+ )
+ result = query.build(genomic_data).compute()
+ # check that the distance is correct
+ region_midpoints = (result["region_start"] + result["region_end"]) // 2
+ position_1_midpoint = (result["start_1"] + result["end_1"]) // 2
+ position_2_midpoint = (result["start_2"] + result["end_2"]) // 2
+ assert np.allclose(
+ position_1_midpoint - region_midpoints,
+ result["distance_1"],
+ )
+ assert np.allclose(
+ position_2_midpoint - region_midpoints,
+ result["distance_2"],
+ )
+
+
+@pytest.mark.parametrize(
+ "genomic_data_fixture",
+ ["pixels_with_single_region", "pixels_with_multiple_regions"],
+)
+def test_distance_midpoint_rejected_pixels(genomic_data_fixture, request):
+ """Tests that the distance calculation is rejected with midpoint distance mode for pixels."""
+ genomic_data = request.getfixturevalue(genomic_data_fixture)
+ query = Query(
+ query_steps=[
+ DistanceTransformation(DistanceMode.MIDPOINT),
+ ],
+ )
+ with pytest.raises(ValueError):
+ query.build(genomic_data)
+
+
+@pytest.mark.parametrize(
+ "genomic_data_fixture",
+ ["pixels_with_single_region", "pixels_with_multiple_regions"],
+)
+def test_distance_pixels(genomic_data_fixture, request):
+ """Tests distance calculation succeeds for pixels with distancemode left."""
+ genomic_data = request.getfixturevalue(genomic_data_fixture)
+ query = Query(
+ query_steps=[
+ DistanceTransformation(DistanceMode.LEFT),
+ ],
+ )
+ result = query.build(genomic_data).compute()
+ # check that the distance is correct
+ region_midpoints = (result["region_start"] + result["region_end"]) // 2
+ position_1_midpoint = result["start_1"]
+ position_2_midpoint = result["start_2"]
+ assert np.allclose(
+ position_1_midpoint - region_midpoints,
+ result["distance_1"],
+ )
+ assert np.allclose(
+ position_2_midpoint - region_midpoints,
+ result["distance_2"],
+ )
+
+
+@pytest.mark.parametrize(
+ "genomic_data_fixture",
+ ["pixels_with_single_region", "pixels_with_multiple_regions"],
+)
+def test_distance_pixels_left_string_arg(genomic_data_fixture, request):
+ """Tests distance calculation succeeds for pixels with distancemode left, specified as string."""
+ genomic_data = request.getfixturevalue(genomic_data_fixture)
+ query = Query(
+ query_steps=[
+ DistanceTransformation("left"),
+ ],
+ )
+ result = query.build(genomic_data).compute()
+ # check that the distance is correct
+ region_midpoints = (result["region_start"] + result["region_end"]) // 2
+ position_1_midpoint = result["start_1"]
+ position_2_midpoint = result["start_2"]
+ assert np.allclose(
+ position_1_midpoint - region_midpoints,
+ result["distance_1"],
+ )
+ assert np.allclose(
+ position_2_midpoint - region_midpoints,
+ result["distance_2"],
+ )
diff --git a/tests/query_engine/test_pixel_selection.py b/tests/query_engine/test_pixel_selection.py
new file mode 100644
index 0000000..c0c84e4
--- /dev/null
+++ b/tests/query_engine/test_pixel_selection.py
@@ -0,0 +1,269 @@
+"""Tests for the pixel selection"""
+import dask.dataframe as dd
+import duckdb
+import numpy as np
+import pytest
+
+from spoc.io import DUCKDB_CONNECTION
+from spoc.pixels import Pixels
+from spoc.query_engine import Anchor
+from spoc.query_engine import Overlap
+from spoc.query_engine import Query
+
+
+@pytest.fixture(name="pixels_dask")
+def pixels_dask_fixture(pixel_dataframe):
+ """A dask dataframe containing pixels"""
+ return Pixels(
+ dd.from_pandas(pixel_dataframe, npartitions=2),
+ number_fragments=2,
+ binsize=10,
+ )
+
+
+@pytest.fixture(name="pixels_pandas")
+def pixels_pandas_fixture(pixel_dataframe):
+ """A pandas dataframe containing pixels"""
+ return Pixels(pixel_dataframe, number_fragments=2, binsize=10)
+
+
+@pytest.fixture(name="pixels_duckdb")
+def pixels_duckdb_fixture(pixel_dataframe):
+ """A duckdb dataframe containing pixels"""
+ return Pixels(
+ duckdb.from_df(pixel_dataframe, DUCKDB_CONNECTION),
+ number_fragments=2,
+ binsize=10,
+ )
+
+
+# happy path
+
+
+@pytest.mark.parametrize(
+ "pixels_fixture",
+ [
+ "pixels_pandas",
+ "pixels_duckdb",
+ ],
+)
+def test_no_filter_returns_all_pixels(pixels_fixture, request):
+ """Test that no filter returns all pixels"""
+ pixels = request.getfixturevalue(pixels_fixture)
+ query = Query(query_steps=[])
+ result = query.build(pixels)
+ assert result.compute().shape[0] == 4
+
+
+@pytest.mark.parametrize(
+ "pixels_fixture",
+ [
+ "pixels_pandas",
+ "pixels_dask",
+ "pixels_duckdb",
+ ],
+)
+def test_any_anchor_region_returns_correct_pixels(
+ pixels_fixture, single_region, request
+):
+ """Test that any anchor region returns correct pixels"""
+ # setup
+ pixels = request.getfixturevalue(pixels_fixture)
+ query_plan = [Overlap(regions=single_region, anchor_mode=Anchor(mode="ANY"))]
+ # execution
+ query = Query(query_steps=query_plan)
+ result = query.build(pixels)
+ # test correct selection
+ assert result.compute().shape[0] == 2
+ assert sorted(result.compute()["count"].tolist()) == sorted([1, 2])
+ # test addition of end columns
+ assert np.allclose(result.compute()["start_1"] + 10, result.compute()["end_1"])
+ assert np.allclose(result.compute()["start_2"] + 10, result.compute()["end_2"])
+
+
+@pytest.mark.parametrize(
+ "pixels_fixture",
+ [
+ "pixels_pandas",
+ "pixels_dask",
+ "pixels_duckdb",
+ ],
+)
+def test_all_anchor_regions_returns_correct_pixels(
+ pixels_fixture, single_region, request
+):
+ """Test that all anchor regions returns correct pixels"""
+ # setup
+ pixels = request.getfixturevalue(pixels_fixture)
+ query_plan = [Overlap(regions=single_region, anchor_mode=Anchor(mode="ALL"))]
+ # execution
+ query = Query(query_steps=query_plan)
+ result = query.build(pixels)
+ # test
+ assert result.compute().shape[0] == 1
+ assert sorted(result.compute()["count"].tolist()) == sorted([2])
+
+
+@pytest.mark.parametrize(
+ "pixel_fixture,anchors,expected_reads",
+ [
+ (source_data, anchors, expected_reads)
+ for source_data, anchors, expected_reads in zip(
+ [
+ "pixels_pandas",
+ "pixels_dask",
+ "pixels_duckdb",
+ ]
+ * 3,
+ [[1]] * 3 + [[2]] * 3 + [[1, 2]] * 3,
+ [[3]] * 3 + [[4]] * 3 + [[]] * 3,
+ )
+ ],
+)
+def test_specific_anchor_regions_returns_correct_pixels(
+ pixel_fixture, anchors, expected_reads, single_region_2, request
+):
+ """Test that specific anchor regions returns correct pixels"""
+ # setup
+ pixels = request.getfixturevalue(pixel_fixture)
+ query_plan = [
+ Overlap(
+ regions=single_region_2, anchor_mode=Anchor(mode="ALL", anchors=anchors)
+ )
+ ]
+ # execution
+ query = Query(query_steps=query_plan)
+ result = query.build(pixels)
+ # test
+ assert result.compute().shape[0] == len(expected_reads)
+ assert sorted(result.compute()["count"].tolist()) == sorted(expected_reads)
+
+
+@pytest.mark.parametrize(
+ "pixels_fixture",
+ [
+ "pixels_pandas",
+ "pixels_dask",
+ "pixels_duckdb",
+ ],
+)
+def test_any_anchor_region_returns_correct_pixels_multi_region(
+ pixels_fixture, multi_region, request
+):
+ """Test that any anchor region returns correct pixels"""
+ # setup
+ pixels = request.getfixturevalue(pixels_fixture)
+ query_plan = [Overlap(regions=multi_region, anchor_mode=Anchor(mode="ANY"))]
+ # execution
+ query = Query(query_steps=query_plan)
+ result = query.build(pixels)
+ # test
+ assert result.compute().shape[0] == 4
+ assert sorted(result.compute()["count"].tolist()) == sorted([1, 2, 3, 4])
+
+
+@pytest.mark.parametrize(
+ "pixels_fixture",
+ [
+ "pixels_pandas",
+ "pixels_dask",
+ "pixels_duckdb",
+ ],
+)
+def test_all_anchor_regions_returns_correct_pixels_multi_region(
+ pixels_fixture, multi_region, request
+):
+ """Test that all anchor regions returns correct pixels"""
+ # setup
+ pixels = request.getfixturevalue(pixels_fixture)
+ query_plan = [Overlap(regions=multi_region, anchor_mode=Anchor(mode="ALL"))]
+ # execution
+ query = Query(query_steps=query_plan)
+ result = query.build(pixels)
+ # test
+ assert result.compute().shape[0] == 1
+ assert sorted(result.compute()["count"].tolist()) == sorted([2])
+
+
+@pytest.mark.parametrize(
+ "pixels_fixture",
+ [
+ "pixels_pandas",
+ "pixels_dask",
+ "pixels_duckdb",
+ ],
+)
+def test_pixels_duplicated_for_multiple_overlapping_regions(
+ pixels_fixture, multi_region_2, request
+):
+ """
+ This test verifies that when multiple overlapping regions are specified as anchor regions,
+ the query returns duplicated pixels for each overlapping region.
+ """
+ # setup
+ pixels = request.getfixturevalue(pixels_fixture)
+ query_plan = [Overlap(regions=multi_region_2, anchor_mode=Anchor(mode="ALL"))]
+ # execution
+ query = Query(query_steps=query_plan)
+ result = query.build(pixels)
+ # test
+ assert result.compute().shape[0] == 2
+ assert sorted(result.compute()["count"].tolist()) == sorted([2, 2])
+
+
+@pytest.mark.parametrize(
+ "pixels_fixture,anchors,expected_reads",
+ [
+ (source_data, anchors, expected_reads)
+ for source_data, anchors, expected_reads in zip(
+ [
+ "pixels_pandas",
+ "pixels_dask",
+ "pixels_duckdb",
+ ]
+ * 3,
+ [[1]] * 3 + [[2]] * 3 + [[1, 2]] * 3,
+ [[1, 2, 3]] * 3 + [[2, 4]] * 3 + [[2]] * 3,
+ )
+ ],
+)
+def test_specific_anchor_regions_returns_correct_pixels_multi_region(
+ pixels_fixture, anchors, expected_reads, multi_region, request
+):
+ """Test that specific anchor regions returns correct pixels"""
+ # setup
+ pixels = request.getfixturevalue(pixels_fixture)
+ query_plan = [
+ Overlap(regions=multi_region, anchor_mode=Anchor(mode="ALL", anchors=anchors))
+ ]
+ # execution
+ query = Query(query_steps=query_plan)
+ result = query.build(pixels)
+ # test
+ assert result.compute().shape[0] == len(expected_reads)
+ assert sorted(result.compute()["count"].tolist()) == sorted(expected_reads)
+
+
+# validation problems
+
+
+@pytest.mark.parametrize(
+ "pixels_fixture",
+ [
+ "pixels_pandas",
+ "pixels_dask",
+ "pixels_duckdb",
+ ],
+)
+def test_specific_anchor_region_not_in_pixels_raises_error(
+ pixels_fixture, single_region, request
+):
+ """Test that specific anchor region not in pixels raises error"""
+ # setup
+ pixels = request.getfixturevalue(pixels_fixture)
+ query_plan = [
+ Overlap(regions=single_region, anchor_mode=Anchor(mode="ALL", anchors=[3]))
+ ]
+ with pytest.raises(ValueError):
+ query = Query(query_steps=query_plan)
+ query.build(pixels)
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 7ef394b..b0d9277 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -1,17 +1,17 @@
"""Tests for CLI of spoc"""
# pylint: disable=redefined-outer-name
-
-import shutil
import os
-import pytest
-import pandas as pd
-from pandas.testing import assert_frame_equal
+import shutil
+
import numpy as np
+import pandas as pd
+import pytest
from click.testing import CliRunner
+from pandas.testing import assert_frame_equal
from spoc import cli
-from spoc.io import FileManager
from spoc.contacts import Contacts
+from spoc.io import FileManager
from spoc.models import dataframe_models
from spoc.pixels import Pixels
@@ -189,7 +189,9 @@ def test_merge_contacts_works_with_compatible_contacts(mergable_triplet_files):
# check number of files
assert len(FileManager().list_contacts(output_path)) == 1
# check content of file
- labelled_fragments = Contacts.from_uri(f"{output_path}::3", mode="pandas").data
+ labelled_fragments = Contacts.from_uri(
+ f"{output_path}::3", mode=dataframe_models.DataMode.PANDAS
+ ).data
assert len(labelled_fragments) == 8
first_half = labelled_fragments.iloc[:4, :].reset_index(drop=True)
second_half = labelled_fragments.iloc[4:, :].reset_index(drop=True)
@@ -219,5 +221,7 @@ def test_bin_contacts(good_triplet_file_for_pixels, expected_pixels):
output_path = "tmp/test_output5.parquet"
runner.invoke(cli.bin_contacts, [good_triplet_file_for_pixels, output_path])
# check content of file
- pixels = Pixels.from_uri(f"{output_path}::3::10000", mode="pandas")
+ pixels = Pixels.from_uri(
+ f"{output_path}::3::10000", mode=dataframe_models.DataMode.PANDAS
+ )
np.array_equal(pixels.data.values, expected_pixels.values)
diff --git a/tests/test_pixels.py b/tests/test_pixels.py
index e015a70..a38afd0 100644
--- a/tests/test_pixels.py
+++ b/tests/test_pixels.py
@@ -1,10 +1,10 @@
"""Tests for the pixels module"""
# pylint: disable=redefined-outer-name
-
-import pytest
import dask.dataframe as dd
-import pandas as pd
import numpy as np
+import pandas as pd
+import pytest
+
from spoc import pixels
from spoc.contacts import Contacts