Merge pull request #18 from AutoResearch/feat/pd-dataframe

Feat/pd dataframe
AutoResearch · Aug 18, 2023 · 660fe83 · 660fe83
2 parents 09514ce + 0079974
commit 660fe83
Show file tree

Hide file tree

Showing 19 changed files with 1,334 additions and 1,621 deletions.
diff --git a/docs/sampler/Basic Usage.ipynb → docs/Basic Usage.ipynb b/docs/sampler/Basic Usage.ipynb → docs/Basic Usage.ipynb
diff --git a/docs/sampler/index.md → docs/index.md b/docs/sampler/index.md → docs/index.md
@@ -1,4 +1,4 @@
-# Falsification Sampler
+# Falsification Experimentalist
 
 The falsification sampler identifies novel experimental conditions $X'$ under 
 which the loss $\hat{\mathcal{L}}(M,X,Y,X')$ of the best 
@@ -72,13 +72,18 @@ An example output of the falsification sampler is:
 
 The selected conditons are predicted to yield the highest error from for the linear regression model. 
 
-### Example Code
+You may also use the falsification pooler to obtain novel experiment conditions from the range of values associated 
+with each independent variable. To prevent the falsification pooler from sampling at the limits of the domain ($0$ and $2/pi$),
+it can be provided with optional parameter ``limit_repulsion`` that bias samples for new
+experimental conditions away from the boundaries of $X$, as shown in the second example below.
+
+### Example Code for Falsification Sampler
 ```python
 import numpy as np
 from sklearn.linear_model import LinearRegression
 from autora.variable import DV, IV, ValueType, VariableCollection
-from autora.experimentalist.sampler.falsification import falsification_sample
-from autora.experimentalist.sampler.falsification import falsification_score_sample
+from autora.experimentalist.falsification import falsification_sample
+from autora.experimentalist.falsification import falsification_score_sample
 
 # Specify X and Y
 X = np.linspace(0, 2 * np.pi, 100)
@@ -110,7 +115,7 @@ model.fit(X.reshape(-1, 1), Y)
 
 # Sample four novel conditions
 X_selected = falsification_sample(
-    condition_pool=X_prime,
+    conditions=X_prime,
     model=model,
     reference_conditions=X,
     reference_observations=Y,
@@ -121,23 +126,83 @@ X_selected = falsification_sample(
 # convert Iterable to numpy array
 X_selected = np.array(list(X_selected))
 
-print(X_selected)
-
 # We may also obtain samples along with their z-scored novelty scores
-X_selected, scores = falsification_score_sample(
-    condition_pool=X_prime,
+X_selected = falsification_score_sample(
+    conditions=X_prime,
     model=model,
     reference_conditions=X,
     reference_observations=Y,
     metadata=metadata,
     num_samples=4)
+
+print(X_selected)
 ```
 
 Output:
 ````
-[[0. ]
- [6.5]
- [6. ]
- [2. ]]
+    0     score
+0  6.5  2.676909
+1  0.0  1.812108
+2  4.5  0.138694
+3  2.0  0.137721
+````
+
+### Example Code for Falsification Pooler
+
+```python
+import numpy as np
+from sklearn.linear_model import LinearRegression
+from autora.variable import DV, IV, ValueType, VariableCollection
+from autora.experimentalist.falsification import falsification_pool
+
+# Specify X and Y
+X = np.linspace(0, 2 * np.pi, 100)
+Y = np.sin(X)
+
+# We need to provide the pooler with some metadata specifying the independent and dependent variables
+# Specify independent variable
+iv = IV(
+    name="x",
+    value_range=(0, 2 * np.pi),
+)
+
+# specify dependent variable
+dv = DV(
+    name="y",
+    type=ValueType.REAL,
+)
+
+# Variable collection with ivs and dvs
+metadata = VariableCollection(
+    independent_variables=[iv],
+    dependent_variables=[dv],
+)
+
+# Fit a linear regression to the data
+model = LinearRegression()
+model.fit(X.reshape(-1, 1), Y)
+
+# Sample four novel conditions
+X_sampled = falsification_pool(
+    model=model,
+    reference_conditions=X,
+    reference_observations=Y,
+    metadata=metadata,
+    num_samples=4,
+    limit_repulsion=0.01,
+)
+
+# convert Iterable to numpy array
+X_sampled = np.array(list(X_sampled))
+
+print(X_sampled)
+```
+
+Output:
+````
+[[6.28318531]
+ [2.16611028]
+ [2.16512322]
+ [2.17908978]]
 ````
 
diff --git a/docs/pooler/model-vs-data.png → docs/model-vs-data.png b/docs/pooler/model-vs-data.png → docs/model-vs-data.png
diff --git a/docs/pooler/mse.png → docs/mse.png b/docs/pooler/mse.png → docs/mse.png
diff --git a/docs/pooler/Basic Usage.ipynb b/docs/pooler/Basic Usage.ipynb
diff --git a/docs/pooler/index.md b/docs/pooler/index.md
diff --git a/docs/pooler/quickstart.md → docs/quickstart.md b/docs/pooler/quickstart.md → docs/quickstart.md
@@ -13,5 +13,5 @@ pip install -U autora["experimentalist-falsification"]
 
 Check your installation by running:
 ```shell
-python -c "from autora.experimentalist.pooler.falsification import falsification_pool"
+python -c "from autora.experimentalist.falsification import falsification_sample"
 ```
diff --git a/docs/sampler/model-vs-data.png b/docs/sampler/model-vs-data.png
diff --git a/docs/sampler/mse.png b/docs/sampler/mse.png
diff --git a/docs/sampler/quickstart.md b/docs/sampler/quickstart.md
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -5,13 +5,7 @@ site_name: AutoRA Falsification Experimentalist
 repo_url: 'https://github.com/AutoResearch/autora-experimentalist-falsification'
 
 nav:
-- Pooler:
-  - Home: 'pooler/index.md'
-  - Quickstart: 'pooler/quickstart.md'
-  - Examples:
-    - Basic Usage: 'pooler/Basic Usage.ipynb'
-- Sampler:
-  - Home: 'sampler/index.md'
-  - Quickstart: 'sampler/quickstart.md'
-  - Examples:
-    - Basic Usage: 'sampler/Basic Usage.ipynb'
+- Home: 'pooler/index.md'
+- Quickstart: 'pooler/quickstart.md'
+- Examples:
+  - Basic Usage: 'pooler/Basic Usage.ipynb'
diff --git a/pyproject.toml b/pyproject.toml
@@ -10,7 +10,8 @@ license = {file = "LICENSE"}
 # ADD NEW DEPENDENCIES HERE
 dependencies = [
     "autora-core>=3.1.0",
-    "torch"
+    "torch",
+    "pandas"
 ]
 
 [project.optional-dependencies]