Merge pull request #68 from rodekruis/ada-0.1

Ada 0.1
rodekruis · Feb 2, 2022 · a57f1b8 · a57f1b8
2 parents 200c4ae + 8ab811d
commit a57f1b8
Show file tree

Hide file tree

Showing 28 changed files with 2,200 additions and 86,764 deletions.
diff --git a/CHANGES.md b/CHANGES.md
@@ -1,91 +1,3 @@
-0.6.5 (2020-02-07)
-------------------
-- Remove accuracy threshold
-- Log all metrics
-- Set default metric to F1 macro
-- Add experiment results to repository
-- Save classification prediction probabilities to file
-- Add jupyter notebook for inspecting Caladrius datasets
-- Add dataset version argument to Sint-Maarten-2017 script
-
-0.6.4 (2019-12-30)
-------------------
-- Show table of models instead of dropdown
-- Visualize val performance
-- Create predict dataset set if possible
-- Create Caladrius Dataset using Digital Globe images for Sint Maarten
-
-0.6.3 (2019-12-22)
-------------------
-- evaluate random model
-- evaluate label average model
-
-0.6.2 (2019-11-30)
-------------------
-- use [bulma](https://bulma.io/) ui
-- refactor d3
-- add authentication
-- modularize the UI components
-- calculate model accuracy based on threshold
-- added terms and conditions
-
-0.6.1 (2019-11-22)
-------------------
-- Integrated formatters for Python ([Black](https://black.readthedocs.io/en/stable/) and [flake8](https://gitlab.com/pycqa/flake8)) and javascript/css/html/json ([Prettier](https://prettier.io/))
-- Enforced formatters using [husky](https://github.com/typicode/husky), [lint-staged](https://github.com/okonet/lint-staged) and [pre-commit](https://pre-commit.com/)
-- Fixed bugs in interface
-- Create/Download Report
-
-0.6.0 (2019-10-14)
-------------------
-- Added interface backend to access model and dataset
-- Interface allows switching models via dropdown
-- Removed builds from conda env file
-- Removed yarn dependency
-- Updated Docker image
-
-0.5.0 (2019-09-22)
-------------------
-- Added `accuracy_threshold` as input argument
-- Fixed batch size 1 bugs
-- Removed setup tools installation process
-- Increased verbosity of `sint_maarten_2017.py`
-- Switched to miniconda
-- Updated Docker image
-
-0.4.0 (2019-07-19)
-------------------
-- Refactored interface to use React components
-
-0.3.1 (2019-08-12)
-------------------
-- When creating the individual building images using `caladrius_data`,
-  now checks for overlap between different drone images and selects the
-  best option, discarding any with <90% good pixels
-
-0.3.0 (2019-06-06)
-------------------
-- Refactored `caladrius_data` entrypoint so that user must specify which
-  components of the data preparation should be run
-- Added an option to perform a reverse geocode query for building addresses
-
-0.2.1 (2019-04-09)
-------------------
-- Added administrative region information to the geojson file used for the visualization
-
-0.2.0 (2019-04-09)
-------------------
-- Made Caladrius an installable Python package
-- Restructured project and placed all Python package and interface files
-  in the `caladrius` directory
-- Created entrypoints `caladrius_data` for creating the dataset
-  and `caladrius` for running the model
-
-0.1.1 (2019-03-31)
-------------------
-- Added a `maxDataPoints` parameter to `run.py`, which limits the size of the
-  data sample. To be used primarily for debugging on non-production machines.
-
-0.1.0 (2019-03-22)
+0.1.0 (2021-03-04)
 ------------------
 - Initial version
diff --git a/Dockerfile b/Dockerfile
@@ -36,7 +36,7 @@ RUN curl -sL https://deb.nodesource.com/setup_10.x | bash &&\
 
 # Install Caladrius
 RUN /bin/bash caladrius_install.sh &&\
-    echo "source activate caladriusenv" >> ~/.bashrc
+    echo "source activate cal" >> ~/.bashrc
 
 # Make port 5000 available to the world outside this container
 EXPOSE 5000

diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-[![stable: 0.6.5](https://img.shields.io/badge/stable-0.6.5-ED2E26.svg?style=flat-square)](https://github.com/rodekruis/caladrius)
+[![stable: 0.6.6](https://img.shields.io/badge/stable-0.6.6-ED2E26.svg?style=flat-square)](https://github.com/rodekruis/caladrius)
 [![F.A.C.T.: 42](https://img.shields.io/badge/F\.A\.C\.T\.-42-291AE0.svg?style=flat-square)](https://rodekruis.sharepoint.com/sites/510-Team/_layouts/15/Doc.aspx?OR=teams&action=edit&sourcedoc={FD66FFCB-C34C-433E-9706-F672A8EFAB3D})
 [![code style: prettier](https://img.shields.io/badge/code_style-prettier-ff69b4.svg?style=flat-square)](https://github.com/prettier/prettier)
 [![code style: black](https://img.shields.io/badge/code%20style-black-000000.svg?style=flat-square)](https://github.com/psf/black)

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-0.6.5
+0.1.0
diff --git a/caladrius/change_labels.py b/caladrius/change_labels.py
@@ -0,0 +1,133 @@
+import os
+import sys
+import argparse
+import logging
+import pandas as pd
+
+
+def binary_labels(
+    directory_path,
+    file_label_in,
+    file_label_out,
+    switch=False,
+    destroyed=False,
+    destroyed_switch=False,
+):
+    for set_name in ["train", "validation", "test"]:
+        df = pd.read_csv(
+            os.path.join(directory_path, set_name, file_label_in),
+            sep=" ",
+            header=None,
+            names=["filename", "damage"],
+        )
+        if switch:
+            df.damage = (df.damage < 1).astype(int)
+        elif destroyed:
+            df.damage = (df.damage > 2).astype(int)
+        elif destroyed_switch:
+            df.damage = (df.damage < 3).astype(int)
+        else:
+            df.damage = (df.damage >= 1).astype(int)
+
+        df.to_csv(
+            os.path.join(directory_path, set_name, file_label_out),
+            sep=" ",
+            index=False,
+            header=False,
+        )
+
+
+def disaster_labels(disaster_names, directory_path, file_label_in, file_label_out):
+    assert disaster_names is not None
+
+    for set_name in ["train", "validation", "test"]:
+        label_path = os.path.join(directory_path, set_name, file_label_in)
+        if os.path.exists(label_path):
+            df = pd.read_csv(
+                label_path, sep=" ", header=None, names=["filename", "damage"],
+            )
+            disaster_names_list = [item for item in disaster_names.split(",")]
+            pattern = "|".join([f"{d}" for d in disaster_names_list])
+            df_select = df[df.filename.str.contains(pattern)]
+            df_select.to_csv(
+                os.path.join(directory_path, set_name, file_label_out),
+                sep=" ",
+                index=False,
+                header=False,
+            )
+        else:
+            print("No label file for {}".format(set_name))
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--data-path",
+        default=False,
+        type=str,
+        metavar="data_path",
+        help="Path where buildings are saved",
+    )
+    parser.add_argument(
+        "--file-in",
+        default="labels.txt",
+        type=str,
+        metavar="file_in",
+        help="name of file with original labels",
+    )
+
+    parser.add_argument(
+        "--file-out",
+        type=str,
+        metavar="file_out",
+        help="name of file with output labels",
+    )
+
+    parser.add_argument(
+        "--label-type",
+        default="binary",
+        type=str,
+        metavar="label_type",
+        choices=[
+            "binary",
+            "regression",
+            "regression_noise",
+            "disaster",
+            "binary_switch",
+            "binary_des",
+            "binary_des_switch",
+        ],
+        help="type of output labels",
+    )
+
+    parser.add_argument(
+        "--disaster-names",
+        default=None,
+        type=str,
+        metavar="disaster_names",
+        help="List of disasters to be included, as a delimited string. E.g. typhoon,flood This can be types or specific occurences, as long as the building filenames contain these names.",
+    )
+
+    args = parser.parse_args()
+
+    if args.label_type == "binary":
+        binary_labels(args.data_path, args.file_in, args.file_out)
+    elif args.label_type == "binary_switch":
+        binary_labels(args.data_path, args.file_in, args.file_out, switch=True)
+    elif args.label_type == "binary_des":
+        binary_labels(args.data_path, args.file_in, args.file_out, destroyed=True)
+    elif args.label_type == "binary_des_switch":
+        binary_labels(
+            args.data_path, args.file_in, args.file_out, destroyed_switch=True
+        )
+
+    elif args.label_type == "disaster":
+        disaster_labels(
+            args.disaster_names, args.data_path, args.file_in, args.file_out,
+        )
+
+
+if __name__ == "__main__":
+    main()