diff --git a/.quarto/crossref/about.qmd/about.html.json b/.quarto/crossref/about.qmd/about.html.json index 208374b..7df77d5 100644 --- a/.quarto/crossref/about.qmd/about.html.json +++ b/.quarto/crossref/about.qmd/about.html.json @@ -1 +1 @@ -{"entries":[],"headings":[]} \ No newline at end of file +{"headings":[],"entries":[]} \ No newline at end of file diff --git a/.quarto/crossref/about_me.qmd/about_me.html.json b/.quarto/crossref/about_me.qmd/about_me.html.json index 208374b..7df77d5 100644 --- a/.quarto/crossref/about_me.qmd/about_me.html.json +++ b/.quarto/crossref/about_me.qmd/about_me.html.json @@ -1 +1 @@ -{"entries":[],"headings":[]} \ No newline at end of file +{"headings":[],"entries":[]} \ No newline at end of file diff --git a/.quarto/crossref/index.qmd/index.html.json b/.quarto/crossref/index.qmd/index.html.json index cd6ee17..a9cba2c 100644 --- a/.quarto/crossref/index.qmd/index.html.json +++ b/.quarto/crossref/index.qmd/index.html.json @@ -1 +1 @@ -{"headings":["welcome-to-rtichoke-blog"],"entries":[]} \ No newline at end of file +{"entries":[],"headings":["welcome-to-rtichoke-blog"]} \ No newline at end of file diff --git a/.quarto/idx/about.qmd.json b/.quarto/idx/about.qmd.json deleted file mode 100644 index 2d90d04..0000000 --- a/.quarto/idx/about.qmd.json +++ /dev/null @@ -1 +0,0 @@ -{"title":"About","markdown":{"yaml":{"title":"About","image":"profile.jpg","about":{"template":"jolla","links":[{"icon":"twitter","text":"Twitter","href":"https://twitter.com"},{"icon":"linkedin","text":"LinkedIn","href":"https://linkedin.com"},{"icon":"github","text":"Github","href":"https://github.com"}]}},"containsRefs":false,"markdown":"\n\nAbout this blog\n"},"formats":{"html":{"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":false,"echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"markdown"},"render":{"keep-tex":false,"keep-yaml":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[]},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","output-file":"about.html"},"language":{},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.0.38","editor":"visual","theme":"theme.scss","title":"About","image":"profile.jpg","about":{"template":"jolla","links":[{"icon":"twitter","text":"Twitter","href":"https://twitter.com"},{"icon":"linkedin","text":"LinkedIn","href":"https://linkedin.com"},{"icon":"github","text":"Github","href":"https://github.com"}]}},"extensions":{"book":{"multiFile":true}}}}} \ No newline at end of file diff --git a/.quarto/idx/about_me.qmd.json b/.quarto/idx/about_me.qmd.json index b25287c..7b378d6 100644 --- a/.quarto/idx/about_me.qmd.json +++ b/.quarto/idx/about_me.qmd.json @@ -1 +1 @@ -{"title":"Uriah Finkel","markdown":{"yaml":{"title":"Uriah Finkel","about":{"template":"jolla","image":"avatar.jpg","links":[{"icon":"github","text":"Github","href":"https://github.com/uriahf/rtichoke"},{"icon":"twitter","text":"twitter","href":"https://twitter.com/finkeluriah"},{"icon":"facebook","href":"https://www.facebook.com/groups/rforthemasses","text":"R for the Masses"},{"icon":"linkedin","href":"https://www.linkedin.com/in/uriah-finkel","text":"Linkedin"}]},"format":{"html":{"resources":["audio.mp4"]}}},"containsRefs":false,"markdown":"\n\n
\n\n
How to Say My Name
\n\n\n\n
\n\nAuthor of [rtichoke](https://uriahf.github.io/rtichoke/)\n\nData Scientist in [Clalit Innovation](https://www.clalit-innovation.org/)\n\nCo-Admin of Israeli R community [\"R for the Masses\"](https://www.facebook.com/groups/1655238354489398/)\n"},"formats":{"html":{"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":false,"echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"markdown"},"render":{"keep-tex":false,"keep-yaml":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[]},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","output-file":"about_me.html"},"language":{},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.0.38","editor":"visual","theme":"theme.scss","title":"Uriah Finkel","about":{"template":"jolla","image":"avatar.jpg","links":[{"icon":"github","text":"Github","href":"https://github.com/uriahf/rtichoke"},{"icon":"twitter","text":"twitter","href":"https://twitter.com/finkeluriah"},{"icon":"facebook","href":"https://www.facebook.com/groups/rforthemasses","text":"R for the Masses"},{"icon":"linkedin","href":"https://www.linkedin.com/in/uriah-finkel","text":"Linkedin"}]},"resources":["audio.mp4"]},"extensions":{"book":{"multiFile":true}}}}} \ No newline at end of file +{"title":"Uriah Finkel","markdown":{"yaml":{"title":"Uriah Finkel","about":{"template":"jolla","image":"avatar.jpg","links":[{"icon":"github","text":"Github","href":"https://github.com/uriahf/rtichoke"},{"icon":"twitter","text":"twitter","href":"https://twitter.com/finkeluriah"},{"icon":"facebook","href":"https://www.facebook.com/groups/rforthemasses","text":"R for the Masses"},{"icon":"linkedin","href":"https://www.linkedin.com/in/uriah-finkel","text":"Linkedin"}]}},"containsRefs":false,"markdown":"\n\nAuthor of [rtichoke](https://uriahf.github.io/rtichoke/)\n\nData Scientist in [Clalit Innovation](https://www.clalit-innovation.org/)\n\nCo-Admin of Israeli R community [\"R for the Masses\"](https://www.facebook.com/groups/1655238354489398/)\n"},"formats":{"html":{"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":false,"echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"markdown"},"render":{"keep-tex":false,"keep-yaml":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[]},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","output-file":"about_me.html"},"language":{},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.0.38","editor":"visual","theme":"theme.scss","title":"Uriah Finkel","about":{"template":"jolla","image":"avatar.jpg","links":[{"icon":"github","text":"Github","href":"https://github.com/uriahf/rtichoke"},{"icon":"twitter","text":"twitter","href":"https://twitter.com/finkeluriah"},{"icon":"facebook","href":"https://www.facebook.com/groups/rforthemasses","text":"R for the Masses"},{"icon":"linkedin","href":"https://www.linkedin.com/in/uriah-finkel","text":"Linkedin"}]}},"extensions":{"book":{"multiFile":true}}}}} \ No newline at end of file diff --git a/.quarto/preview/lock b/.quarto/preview/lock index 47fc26d..c747e45 100644 --- a/.quarto/preview/lock +++ b/.quarto/preview/lock @@ -1 +1 @@ -14252 \ No newline at end of file +9972 \ No newline at end of file diff --git a/_freeze/posts/2022-10-02-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index/execute-results/html.json b/_freeze/posts/2022-10-02-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index/execute-results/html.json index 3c6b752..a003dfe 100644 --- a/_freeze/posts/2022-10-02-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index/execute-results/html.json +++ b/_freeze/posts/2022-10-02-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index/execute-results/html.json @@ -1,7 +1,7 @@ { - "hash": "9f670993ef276914c2560a912f6d5edd", + "hash": "0602a8ade15e74e8cbf90de0f1a5ac2d", "result": { - "markdown": "---\ntitle: \"DCA for Quantifying the Additional Benefit of a New Marker by Emily Vertosick and Andrew Vickers\"\ndate: \"2022-10-02\"\ncategories: \n - Replications\n - Decision\n - Emily Vertosick\n - Andrew Vickers\n - gt\n - gtsummary\n - dcurves\n - rms\n - Hmisc\nimage: \"image.jpg\"\ndraft: true\n---\n\n\n\n\n## Additional Benefit of a New Marker\n\nPrediction Model might gain accuracy if you'll add more relevant features to existing models, but many times it's not obvious what is the additional value of additional feature and how to quantify it in terms of Decision Making. The post [Decision curve analysis for quantifying the additional benefit of a new marker](https://www.fharrell.com/post/addmarkerdca) by Emily Vertosick and Andrew Vickers show a simple example (the code presented here is almost identical to the original code presented in the link).\n\n## Preparing the Data\n\n### Loading the Data with Hmisc\n\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(Hmisc)\nlibrary(dplyr)\nlibrary(tibble)\n\ngetHdata(acath)\nacath <- subset(acath, !is.na(choleste))\n```\n:::\n\n\n### Fitting Logistic Regressions with rms\n\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(rms)\n\npre <- lrm(sigdz ~ rcs(age,4) * sex, data = acath)\npre_pred <- predict(pre, type='fitted')\n\npost <- lrm(sigdz ~ rcs(age,4) * sex + \n rcs(choleste,4) + rcs(age,4) %ia% rcs(choleste,4), data = acath)\npost_pred <- predict(post, type='fitted')\n\nacath_pred <- bind_cols(\n acath,\n pre_pred %>% enframe(name = NULL, value = \"pre\"),\n post_pred %>% enframe(name = NULL, value = \"post\")\n )\n```\n:::\n\n\n## Conventional Decision Curve\n\n::: panel-tabset\n### dcurves\n\n::: {layout-ncol=\"2\"}\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(dcurves)\n\ndca_prepost <- dca(\n sigdz ~ pre + post,\n data = acath_pred,\n label = list(\n pre = \"Age and Sex\",\n post = \"Age, Sex and Cholesterol\"))\n\ndca_prepost %>%\n plot(smooth = TRUE) + \n theme_classic() +\n theme(legend.position = \"none\")\n```\n:::\n\n\n![](./conventional_decision.svg)\n\n\n\n\n:::\n\n### rtichoke\n\n::: {layout-ncol=\"2\"}\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(rtichoke)\nlibrary(plotly)\n\nperformance_data_dc <- \n prepare_performance_data(\n probs = list(\n \"Age and Sex\" = \n acath_pred$pre,\n \"Age, Sex and Cholesterol\" = \n acath_pred$post\n ),\n reals = list(acath_pred$sigdz)\n)\n\nperformance_data_dc %>%\n plot_decision_curve(\n col_values = \n c(\"#00BFC4\", \"#C77CFF\"),\n size = 350\n ) %>%\n layout(\n yaxis = list(\n range =\n c(-0.07, 0.7)\n )\n )\n```\n:::\n\n::: {.cell}\n::: {.cell-output-display}\n```{=html}\n
\n\n```\n:::\n:::\n\n:::\n:::\n\n## Specific Range of Probability Thresholds\n\n::: panel-tabset\n### dcurves\n\n::: {layout-ncol=\"2\"}\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(dcurves)\n\ndca_prepost_15_35 <- dca(\n sigdz ~ pre + post,\n data = acath_pred,\n thresholds = seq(0.15, 0.35, by = 0.05),\n label = list(\n pre = \"Age and Sex\",\n post = \"Age, Sex and Cholesterol\")) %>%\n plot(type = 'net_benefit', \n smooth = FALSE, \n show_ggplot_code = FALSE)\n\ndca_prepost_15_35 + \n theme_classic() + \n theme(legend.position = \"none\")\n```\n:::\n\n\n\n\n![](conventional_decision_15_35.svg)\n:::\n\n### rtichoke\n\n::: {layout-ncol=\"2\"}\n\n::: {.cell}\n\n```{.r .cell-code}\nperformance_data_dc %>% \n rtichoke::plot_decision_curve(\n col_values = c(\"#00BFC4\", \"#C77CFF\"),\n min_p_threshold = 0.15, \n max_p_threshold = 0.35,\n size = 350\n ) %>% \n plotly::layout(\n yaxis = list(range =\n c(-0.07, 0.7))\n ) \n```\n:::\n\n::: {.cell}\n::: {.cell-output-display}\n```{=html}\n
\n\n```\n:::\n:::\n\n:::\n:::\n\n## Interventions Avoided\n\n::: panel-tabset\n### dcurves\n\n::: {layout-ncol=\"2\"}\n\n::: {.cell}\n\n```{.r .cell-code}\n# code\n```\n:::\n\n\n![](./interventions_avoided.svg)\n:::\n\n### rtichoke\n\n::: {layout-ncol=\"2\"}\n\n::: {.cell}\n\n```{.r .cell-code}\nperformance_data_dc %>%\n rtichoke::plot_decision_curve(\n col_values = c(\"#F8766D\", \"#00BFC4\"),\n type = \"interventions avoided\",\n size = 350\n ) %>%\n plotly::layout(\n yaxis = list(range =\n c(-10, 100))\n )\n```\n:::\n\n::: {.cell}\n::: {.cell-output-display}\n```{=html}\n
\n\n```\n:::\n:::\n\n:::\n:::\n\n## Conventional and Interventions Avoided Combined (rtichoke code)\n\n::: {layout-ncol=\"2\"}\n\n::: {.cell}\n\n```{.r .cell-code}\nperformance_data_dc %>%\n plot_decision_curve(\n col_values = \n c(\"#00BFC4\", \"#C77CFF\"),\n type = \"combined\",\n size = 500\n )\n```\n:::\n\n::: {.cell}\n::: {.cell-output-display}\n```{=html}\n
\n\n```\n:::\n:::\n\n:::\n", + "markdown": "---\ntitle: \"DCA for Quantifying the Additional Benefit of a New Marker by Emily Vertosick and Andrew Vickers\"\ndate: \"2022-10-02\"\ncategories: \n - Replications\n - Decision\n - Emily Vertosick\n - Andrew Vickers\n - gt\n - gtsummary\n - dcurves\n - rms\n - Hmisc\nimage: \"image.jpg\"\ndraft: false\n---\n\n\n\n\n## Additional Benefit of a New Marker\n\nPrediction Model might gain accuracy if you'll add more relevant features to existing models, but many times it's not obvious what is the additional value of additional feature and how to quantify it in terms of Decision Making. The post [Decision curve analysis for quantifying the additional benefit of a new marker](https://www.fharrell.com/post/addmarkerdca) by Emily Vertosick and Andrew Vickers show a simple example (the code presented here is almost identical to the original code presented in the link).\n\n## Preparing the Data\n\n### Loading the Data with Hmisc\n\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(Hmisc)\nlibrary(dplyr)\nlibrary(tibble)\n\ngetHdata(acath)\nacath <- subset(acath, !is.na(choleste))\n```\n:::\n\n\n### Fitting Logistic Regressions with rms\n\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(rms)\n\npre <- lrm(sigdz ~ rcs(age,4) * sex, data = acath)\npre_pred <- predict(pre, type='fitted')\n\npost <- lrm(sigdz ~ rcs(age,4) * sex + \n rcs(choleste,4) + rcs(age,4) %ia% rcs(choleste,4), data = acath)\npost_pred <- predict(post, type='fitted')\n\nacath_pred <- bind_cols(\n acath,\n pre_pred %>% enframe(name = NULL, value = \"pre\"),\n post_pred %>% enframe(name = NULL, value = \"post\")\n )\n```\n:::\n\n\n## Conventional Decision Curve\n\n::: panel-tabset\n### dcurves\n\n::: {layout-ncol=\"2\"}\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(dcurves)\n\ndca_prepost <- dca(\n sigdz ~ pre + post,\n data = acath_pred,\n label = list(\n pre = \"Age and Sex\",\n post = \"Age, Sex and Cholesterol\"))\n\ndca_prepost %>%\n plot(smooth = TRUE) + \n theme_classic() +\n theme(legend.position = \"none\")\n```\n:::\n\n\n![](./conventional_decision.svg)\n\n\n\n\n:::\n\n### rtichoke\n\n::: {layout-ncol=\"2\"}\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(rtichoke)\nlibrary(plotly)\n\nperformance_data_dc <- \n prepare_performance_data(\n probs = list(\n \"Age and Sex\" = \n acath_pred$pre,\n \"Age, Sex and Cholesterol\" = \n acath_pred$post\n ),\n reals = list(acath_pred$sigdz)\n)\n\nperformance_data_dc %>%\n plot_decision_curve(\n col_values = \n c(\"#00BFC4\", \"#C77CFF\"),\n size = 350\n ) %>%\n plotly::layout(\n yaxis = list(\n range =\n c(-0.07, 0.7)\n )\n )\n```\n:::\n\n::: {.cell}\n::: {.cell-output-display}\n```{=html}\n
\n\n```\n:::\n:::\n\n:::\n:::\n\n## Specific Range of Probability Thresholds\n\n::: panel-tabset\n### dcurves\n\n::: {layout-ncol=\"2\"}\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(dcurves)\n\ndca_prepost_15_35 <- dca(\n sigdz ~ pre + post,\n data = acath_pred,\n thresholds = seq(0.15, 0.35, by = 0.05),\n label = list(\n pre = \"Age and Sex\",\n post = \"Age, Sex and Cholesterol\")) %>%\n plot(type = 'net_benefit', \n smooth = FALSE, \n show_ggplot_code = FALSE)\n\ndca_prepost_15_35 + \n theme_classic() + \n theme(legend.position = \"none\")\n```\n:::\n\n\n\n\n![](conventional_decision_15_35.svg)\n:::\n\n### rtichoke\n\n::: {layout-ncol=\"2\"}\n\n::: {.cell}\n\n```{.r .cell-code}\nperformance_data_dc %>% \n plot_decision_curve(\n col_values = c(\"#00BFC4\", \"#C77CFF\"),\n min_p_threshold = 0.15, \n max_p_threshold = 0.35,\n size = 350\n ) %>% \n plotly::layout(\n yaxis = list(range =\n c(-0.07, 0.7))\n ) \n```\n:::\n\n::: {.cell}\n::: {.cell-output-display}\n```{=html}\n
\n\n```\n:::\n:::\n\n:::\n:::\n\n## Interventions Avoided\n\n::: panel-tabset\n### dcurves\n\n::: {layout-ncol=\"2\"}\n\n::: {.cell}\n\n```{.r .cell-code}\ndca_prepost %>%\n net_intervention_avoided() %>% \n plot(type = 'net_intervention_avoided', \n smooth = FALSE) + \n theme_classic() +\n theme(legend.position = \"none\")\n```\n:::\n\n\n![](./interventions_avoided.svg)\n:::\n\n### rtichoke\n\n::: {layout-ncol=\"2\"}\n\n::: {.cell}\n\n```{.r .cell-code}\nperformance_data_dc %>%\n plot_decision_curve(\n col_values = c(\"#F8766D\", \"#00BFC4\"),\n type = \"interventions avoided\",\n size = 350\n ) %>%\n plotly::layout(\n yaxis = list(range =\n c(-10, 100))\n )\n```\n:::\n\n::: {.cell}\n::: {.cell-output-display}\n```{=html}\n
\n\n```\n:::\n:::\n\n:::\n:::\n\n## Conventional and Interventions Avoided Combined (rtichoke code)\n\n::: {layout-ncol=\"2\"}\n\n::: {.cell}\n\n```{.r .cell-code}\nperformance_data_dc %>%\n plot_decision_curve(\n col_values = \n c(\"#00BFC4\", \"#C77CFF\"),\n type = \"combined\",\n size = 500\n )\n```\n:::\n\n::: {.cell}\n::: {.cell-output-display}\n```{=html}\n
\n\n```\n:::\n:::\n\n:::\n", "supporting": [], "filters": [ "rmarkdown/pagebreak.lua" diff --git a/_site/_site/_site/_site/_site/_site/_site/_site/_site/audio.mp4 b/_site/_site/_site/_site/_site/_site/_site/_site/_site/audio.mp4 deleted file mode 100644 index c5295c4..0000000 Binary files a/_site/_site/_site/_site/_site/_site/_site/_site/_site/audio.mp4 and /dev/null differ diff --git a/_site/_site/_site/_site/_site/_site/_site/_site/audio.mp4 b/_site/_site/_site/_site/_site/_site/_site/_site/audio.mp4 deleted file mode 100644 index c5295c4..0000000 Binary files a/_site/_site/_site/_site/_site/_site/_site/_site/audio.mp4 and /dev/null differ diff --git a/_site/_site/_site/_site/_site/_site/_site/audio.mp4 b/_site/_site/_site/_site/_site/_site/_site/audio.mp4 deleted file mode 100644 index c5295c4..0000000 Binary files a/_site/_site/_site/_site/_site/_site/_site/audio.mp4 and /dev/null differ diff --git a/_site/_site/_site/_site/_site/_site/audio.mp4 b/_site/_site/_site/_site/_site/_site/audio.mp4 deleted file mode 100644 index c5295c4..0000000 Binary files a/_site/_site/_site/_site/_site/_site/audio.mp4 and /dev/null differ diff --git a/_site/_site/_site/_site/_site/audio.mp4 b/_site/_site/_site/_site/_site/audio.mp4 deleted file mode 100644 index c5295c4..0000000 Binary files a/_site/_site/_site/_site/_site/audio.mp4 and /dev/null differ diff --git a/_site/_site/_site/_site/audio.mp4 b/_site/_site/_site/_site/audio.mp4 deleted file mode 100644 index c5295c4..0000000 Binary files a/_site/_site/_site/_site/audio.mp4 and /dev/null differ diff --git a/_site/_site/_site/audio.mp4 b/_site/_site/_site/audio.mp4 deleted file mode 100644 index c5295c4..0000000 Binary files a/_site/_site/_site/audio.mp4 and /dev/null differ diff --git a/_site/_site/audio.mp4 b/_site/_site/audio.mp4 deleted file mode 100644 index c5295c4..0000000 Binary files a/_site/_site/audio.mp4 and /dev/null differ diff --git a/_site/about.html b/_site/about.html deleted file mode 100644 index 0fdf2a4..0000000 --- a/_site/about.html +++ /dev/null @@ -1,236 +0,0 @@ - - - - - - - - - -rtichoke blog - About - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- -
- -
- - - - -
- -
-
-

About

-
-
-
-
-

About this blog

- - -
-
- -
- - -
- - - - \ No newline at end of file diff --git a/_site/about_me.html b/_site/about_me.html index 73c566b..dd5397b 100644 --- a/_site/about_me.html +++ b/_site/about_me.html @@ -118,14 +118,6 @@

Uriah Finkel

-
-
-How to Say My Name -
- -

Author of rtichoke

Data Scientist in Clalit Innovation

Co-Admin of Israeli R community “R for the Masses”

diff --git a/_site/audio.mp4 b/_site/audio.mp4 deleted file mode 100644 index c5295c4..0000000 Binary files a/_site/audio.mp4 and /dev/null differ diff --git a/_site/blog.html b/_site/blog.html deleted file mode 100644 index 6198504..0000000 --- a/_site/blog.html +++ /dev/null @@ -1,320 +0,0 @@ - - - - - - - - - -rtichoke blog - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- -
- -
-
-
-

rtichoke blog

-
-
- - -
- - - -
- - -
- - - - \ No newline at end of file diff --git a/_site/gaston_rtichoke_resized.png b/_site/gaston_rtichoke_resized.png deleted file mode 100644 index b13629f..0000000 Binary files a/_site/gaston_rtichoke_resized.png and /dev/null differ diff --git a/_site/listings.json b/_site/listings.json index 4fb2c32..62fd2b1 100644 --- a/_site/listings.json +++ b/_site/listings.json @@ -1,21 +1,8 @@ [ - { - "listing": "/index.html", - "items": [ - "/posts/2022-08-01-precision-recall/index.html", - "/posts/2022-06-06-cox-box-transformation/index.html" - ] - }, - { - "listing": "/blog.html", - "items": [ - "/posts/2022-08-01-precision-recall/index.html", - "/posts/2022-06-06-cox-box-transformation/index.html" - ] - }, { "listing": "/posts.html", "items": [ + "/posts/2022-10-02-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html", "/posts/2022-08-21-cox-box-transformation/index.html" ] } diff --git a/_site/posts.html b/_site/posts.html index 7475c18..8aa110a 100644 --- a/_site/posts.html +++ b/_site/posts.html @@ -156,7 +156,7 @@

Posts

+
Categories
All (2)
Andrew Vickers (1)
Decision (1)
Emily Vertosick (1)
Feature Engineering by Max Kuhn and Kjell Johnson (1)
Hmisc (1)
Kjell Johnson (1)
Max Kuhn (1)
ROC (1)
Replications (2)
caret (1)
dcurves (1)
gt (1)
gtsummary (1)
rms (1)
@@ -169,7 +169,63 @@
Categories
-
+
+
+

+
+ + +
+
diff --git a/_site/posts/2022-04-05-From-Haifa-University-to-Clalit Innovation/image.png b/_site/posts/2022-04-05-From-Haifa-University-to-Clalit Innovation/image.png deleted file mode 100644 index 96cc9ca..0000000 Binary files a/_site/posts/2022-04-05-From-Haifa-University-to-Clalit Innovation/image.png and /dev/null differ diff --git a/_site/posts/2022-04-05-From-Haifa-University-to-Clalit Innovation/index.html b/_site/posts/2022-04-05-From-Haifa-University-to-Clalit Innovation/index.html deleted file mode 100644 index 212ccf8..0000000 --- a/_site/posts/2022-04-05-From-Haifa-University-to-Clalit Innovation/index.html +++ /dev/null @@ -1,253 +0,0 @@ - - - - - - - - - - -rtichoke blog - From Haifa University to Clalit innovation - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- -
- -
-
-
-

From Haifa University to Clalit innovation

-
-
ROC
-
Decision
-
-
-
- - -
- - -
-
Published
-
-

April 5, 2022

-
-
- -
- - -
- - - - -
- - - - -
-

Section

- - -
- -
- - -
- - - - \ No newline at end of file diff --git a/_site/posts/2022-06-06-cox-box-transformation/a.html b/_site/posts/2022-06-06-cox-box-transformation/a.html deleted file mode 100644 index 5143ffc..0000000 --- a/_site/posts/2022-06-06-cox-box-transformation/a.html +++ /dev/null @@ -1,296 +0,0 @@ - - - - - - - - - - - -rtichoke blog - Post With Code - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- -
- -
-
-
-

Post With Code

-
-
news
-
code
-
analysis
-
-
-
- - -
- -
-
Author
-
-

Harlow Malloc

-
-
- -
-
Published
-
-

July 25, 2022

-
-
- -
- - -
- - - - -
- - - - -

This is a post with executable code.

-
-
1 + 1
-
-
[1] 2
-
-
- - - -
- -
- - - - \ No newline at end of file diff --git a/_site/posts/2022-06-06-cox-box-transformation/image.PNG b/_site/posts/2022-06-06-cox-box-transformation/image.PNG deleted file mode 100644 index 3daab7a..0000000 Binary files a/_site/posts/2022-06-06-cox-box-transformation/image.PNG and /dev/null differ diff --git a/_site/posts/2022-06-06-cox-box-transformation/image.jpg b/_site/posts/2022-06-06-cox-box-transformation/image.jpg deleted file mode 100644 index 3ec04c8..0000000 Binary files a/_site/posts/2022-06-06-cox-box-transformation/image.jpg and /dev/null differ diff --git a/_site/posts/2022-06-06-cox-box-transformation/index.html b/_site/posts/2022-06-06-cox-box-transformation/index.html deleted file mode 100644 index ef0d371..0000000 --- a/_site/posts/2022-06-06-cox-box-transformation/index.html +++ /dev/null @@ -1,503 +0,0 @@ - - - - - - - - - - -rtichoke blog - Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- -
- -
-
-
-

Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson

-
-
Replications
-
ROC
-
Max Kuhn
-
Kjell Johnson
-
Feature Engineering by Max Kuhn and Kjell Johnson
-
caret
-
-
-
- - -
- - -
-
Published
-
-

June 6, 2012

-
-
- -
- - -
- - - - -
- - - - -
-

Welcome to rtichoke blog!

-

This blog will be dedicated to the {rtichoke} package, which means that it will contain posts that are related to performance metrics and the possible related usability of {rtichoke}.

-
-
-

Replications

-

To make the package easier to use I plan to reproduce other people’s code with rtichoke, posts of this kind will be available under the category “replications”.

-

My first choice is to replicate the first example from the book “Feature Engineering and Selection: A Practical Approach for Predictive Models by Max Kuhn and Kjell Johnson”.

-

In this example you can see how Box-Cox transformation improves the discrimination capability of the logistic regression model without using any additional information.

-
-
-

Original Code

-

The code is almost identical to the original code that can be found on github.

-
-

Preparing the Data

-
-
library(caret)
-library(tidymodels)
-library(ggplot2)
-
-
-data(segmentationData)
-
-segmentationData <- 
-  segmentationData %>% 
-  dplyr::select(EqSphereAreaCh1, PerimCh1, Class, Case) %>% 
-  setNames(c("PredictorA", "PredictorB", "Class", "Case")) %>% 
-  mutate(Class = factor(ifelse(Class == "PS", "One", "Two")))
-
-example_train <- 
-  segmentationData %>% 
-  dplyr::filter(Case == "Train") %>% 
-  dplyr::select(-Case)
-
-example_test  <- 
-  segmentationData %>% 
-  dplyr::filter(Case == "Test") %>% 
-  dplyr::select(-Case)
-
-
-
-

Training the Models

-
-
example_ctrl <- 
-  trainControl(method = "none",
-               classProbs = TRUE,
-               summaryFunction = twoClassSummary)
-
-natural_terms <- train(Class ~ PredictorA + PredictorB,
-                       data = example_train,
-                       method = "glm",
-                       metric = "ROC",
-                       trControl = example_ctrl)
-
-trans_terms <- train(Class ~ PredictorA + PredictorB,
-                     data = example_train,
-                     method = "glm",
-                     preProc = "BoxCox",
-                     metric = "ROC",
-                     trControl = example_ctrl)
-
-
-
-

Creating Predictions

-
-
original_probs <- predict(natural_terms, example_test, type = "prob")[,1]
-
-transformed_probs <- predict(trans_terms, example_test, type = "prob")[,1]
-
-outcomes <- example_test$Class == "One"
-
-
-
-

Creating ROC Curve with yardstick

-
-
natural_dat <-
-  example_test %>% 
-  mutate(
-    prob = original_probs) %>% 
-  roc_curve(Class, prob) %>% 
-  mutate(Format = "Natural Units")
-
-trans_dat <-
-  example_test %>% 
-  mutate(
-    prob = transformed_probs) %>% 
-  roc_curve(Class, prob) %>% 
-  mutate(Format = "Inverse Units") 
-
-both_dat <- 
-  bind_rows(natural_dat, trans_dat) %>%
-  mutate(
-    Format = factor(Format, levels = c("Natural Units", "Inverse Units")))
-
-trans_roc_plot <- 
-  ggplot(both_dat) +
-  geom_step(aes(x = 1 - specificity, y = sensitivity, color = Format)) + 
-  coord_equal() + 
-  xlab("False Positive Rate") + 
-  ylab("True Positive Rate") + 
-  theme(legend.position = c(.8, .2)) + 
-  scale_colour_manual(
-    values = c("Natural Units" = "grey", 
-               "Inverse Units" = "black")) + 
-  geom_abline(intercept = 0, slope = 1, col = "grey", lty = 2) +
-  theme_classic()
-
-trans_roc_plot
-
-

-
-
-
-

rtichoke code

-
-

Creating ROC Curve with rtichoke

-
- -
-
-
-
-
-
library(rtichoke)
-
-create_roc_curve(
-  probs = list(
-    "Natural Units" = original_probs,
-    "Inverse Units" = transformed_probs
-  ),
-  reals = list(
-    outcomes
-  ),
-  size = 350, 
-  col_values = c("grey", "black")
-) 
-
-
-
-
- -
-
-
-
-
-
-
-
-
-
library(rtichoke)
-
-create_roc_curve(
-  probs = list(
-    "Natural Units" = original_probs,
-    "Inverse Units" = transformed_probs
-  ),
-  reals = list(
-    outcomes
-  ),
-  stratified_by = "ppcr",
-  size = 350, 
-  col_values = c("grey", "black")
-) 
-
-
-
-
- -
-
-
-
-
-
-
- - -
-
- -
- - -
- - - - \ No newline at end of file diff --git a/_site/posts/2022-06-06-cox-box-transformation/index.knit.html b/_site/posts/2022-06-06-cox-box-transformation/index.knit.html deleted file mode 100644 index 3996012..0000000 --- a/_site/posts/2022-06-06-cox-box-transformation/index.knit.html +++ /dev/null @@ -1,463 +0,0 @@ - - - - - - - - - - -rtichoke blog - Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- -
- -
-
-
-

Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson

-
-
Replications
-
ROC
-
-
-
- - -
- - -
-
Published
-
-

June 6, 2012

-
-
- -
- - -
- - - - -
- - - - -

If there are repeated groups on the same page, their tabs are synced:

-
-

Welcome to rtichoke blog!

-

This blog will be dedicated to the {rtichoke} package, which means that it will contain posts that are related to performance metrics and the possible related usability of {rtichoke}.

-
-
-

Replications

-

To make the package easier to use I plan to reproduce other people’s code with rtichoke, posts of this kind will be available under the category “replications”.

-

My first choice is to replicate the first example from the book “Feature Engineering and Selection: A Practical Approach for Predictive Models by Max Kuhn and Kjell Johnson”.

-

In this example you can see how Box-Cox transformation improves the discrimination capability of the logistic regression model without using any additional information.

-
-
-

Original Code

-

The code is almost identical to the original code that can be found on github.

-
-

Preparing the Data

-
-
library(caret)
-library(tidymodels)
-library(ggplot2)
-
-
-data(segmentationData)
-
-segmentationData <- 
-  segmentationData %>% 
-  dplyr::select(EqSphereAreaCh1, PerimCh1, Class, Case) %>% 
-  setNames(c("PredictorA", "PredictorB", "Class", "Case")) %>% 
-  mutate(Class = factor(ifelse(Class == "PS", "One", "Two")))
-
-example_train <- 
-  segmentationData %>% 
-  dplyr::filter(Case == "Train") %>% 
-  dplyr::select(-Case)
-
-example_test  <- 
-  segmentationData %>% 
-  dplyr::filter(Case == "Test") %>% 
-  dplyr::select(-Case)
-
-
-
-

Training the Models

-
-
example_ctrl <- 
-  trainControl(method = "none",
-               classProbs = TRUE,
-               summaryFunction = twoClassSummary)
-
-natural_terms <- train(Class ~ PredictorA + PredictorB,
-                       data = example_train,
-                       method = "glm",
-                       metric = "ROC",
-                       trControl = example_ctrl)
-
-trans_terms <- train(Class ~ PredictorA + PredictorB,
-                     data = example_train,
-                     method = "glm",
-                     preProc = "BoxCox",
-                     metric = "ROC",
-                     trControl = example_ctrl)
-
-
-
-

Creating Predictions

-
-
original_predictions <- predict(natural_terms, example_test, type = "prob")[,1]
-
-transformed_predictions <- predict(trans_terms, example_test, type = "prob")[,1]
-
-outcomes <- example_test$Class == "One"
-
-
-
-

Creating ROC Curve with yardstick

-
-
natural_dat <-
-  example_test %>% 
-  mutate(
-    prob = original_predictions) %>% 
-  roc_curve(Class, prob) %>% 
-  mutate(Format = "Natural Units")
-
-trans_dat <-
-  example_test %>% 
-  mutate(
-    prob = transformed_predictions) %>% 
-  roc_curve(Class, prob) %>% 
-  mutate(Format = "Inverse Units") 
-
-both_dat <- 
-  bind_rows(natural_dat, trans_dat) %>%
-  mutate(
-    Format = factor(Format, levels = c("Natural Units", "Inverse Units")))
-
-trans_roc_plot <- 
-  ggplot(both_dat) +
-  geom_step(aes(x = 1 - specificity, y = sensitivity, color = Format)) + 
-  coord_equal() + 
-  xlab("False Positive Rate") + 
-  ylab("True Positive Rate") + 
-  theme(legend.position = c(.8, .2)) + 
-  scale_colour_manual(
-    values = c("Natural Units" = "grey", 
-               "Inverse Units" = "black")) + 
-  geom_abline(intercept = 0, slope = 1, col = "grey", lty = 2) +
-  theme_classic()
-
-trans_roc_plot
-
-

-
-
-
-

rtichoke code

-
-

Creating ROC Curve with rtichoke

-
- -
-
-
-
-
-

Code

-
library(rtichoke)
-
-create_roc_curve(
-  probs = list(
-    "Natural Units" = original_predictions,
-    "Inverse Units" = transformed_predictions
-  ),
-  reals = list(
-    outcomes
-  ),
-  size = 400, 
-  col_values = c("grey", "black")
-) 
-
-
-

Output

-
-
- -
-
-
-
-
-
-
-
-
-

Code

-
library(rtichoke)
-
-create_roc_curve(
-  probs = list(
-    "Natural Units" = original_predictions,
-    "Inverse Units" = transformed_predictions
-  ),
-  reals = list(
-    outcomes
-  ),
-  stratified_by = "ppcr",
-  size = 400, 
-  col_values = c("grey", "black")
-) 
-
-
-

Output

-
-
- -
-
-
-
-
-
-
- - -
-
- -
- -
- - - - \ No newline at end of file diff --git a/_site/posts/2022-06-06-cox-box-transformation/trans_roc_plot.svg b/_site/posts/2022-06-06-cox-box-transformation/trans_roc_plot.svg deleted file mode 100644 index b3d40a9..0000000 --- a/_site/posts/2022-06-06-cox-box-transformation/trans_roc_plot.svg +++ /dev/null @@ -1,79 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 - - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 -False Positive Rate -True Positive Rate - - -Format - - -Natural Units -Inverse Units - - diff --git a/_site/posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/conventional_decision.svg b/_site/posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/conventional_decision.svg deleted file mode 100644 index fdc21ea..0000000 --- a/_site/posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/conventional_decision.svg +++ /dev/null @@ -1,62 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0.0 -0.2 -0.4 -0.6 - - - - - - - - - - -0% -25% -50% -75% -100% -Threshold Probability -Net Benefit - - diff --git a/_site/posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/conventional_decision_15_35.svg b/_site/posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/conventional_decision_15_35.svg deleted file mode 100644 index 4b9674b..0000000 --- a/_site/posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/conventional_decision_15_35.svg +++ /dev/null @@ -1,62 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0.0 -0.2 -0.4 -0.6 - - - - - - - - - - -15% -20% -25% -30% -35% -Threshold Probability -Net Benefit - - diff --git a/_site/posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/image.jpg b/_site/posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/image.jpg deleted file mode 100644 index 5302309..0000000 Binary files a/_site/posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/image.jpg and /dev/null differ diff --git a/_site/posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html b/_site/posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html deleted file mode 100644 index aba6390..0000000 --- a/_site/posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html +++ /dev/null @@ -1,561 +0,0 @@ - - - - - - - - - - -rtichoke blog - DCA for Quantifying the Additional Benefit of a New Marker by Emily Vertosick and Andrew Vickers - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- -
- -
-
-
-

DCA for Quantifying the Additional Benefit of a New Marker by Emily Vertosick and Andrew Vickers

-
-
Replications
-
Decision
-
Emily Vertosick
-
Andrew Vickers
-
gt
-
gtsummary
-
dcurves
-
rms
-
Hmisc
-
-
-
- - -
- - -
-
Published
-
-

July 5, 2022

-
-
- -
- - -
- - - - -
- - - - -
-

Additional Benefit of a New Marker

-

Prediction Model might gain accuracy if you’ll add more relevant features to existing models, but many times it’s not obvious what is the additional value of additional feature and how to quantify it in terms of Decision Making. The post Decision curve analysis for quantifying the additional benefit of a new marker by Emily Vertosick and Andrew Vickers show a simple example (the code presented here is almost identical to the original code presented in the link).

-
-
-

Preparing the Data

-
-

Loading the Data with Hmisc

-
-
library(Hmisc)
-library(dplyr)
-library(tibble)
-
-getHdata(acath)
-acath <- subset(acath, !is.na(choleste))
-
-
-
-

Fitting Logistic Regressions with rms

-
-
library(rms)
-
-pre <- lrm(sigdz ~ rcs(age,4) * sex, data = acath)
-pre_pred <- predict(pre, type='fitted')
-
-post <- lrm(sigdz ~ rcs(age,4) * sex + 
-              rcs(choleste,4) + rcs(age,4) %ia% rcs(choleste,4), data = acath)
-post_pred <- predict(post, type='fitted')
-
-acath_pred <- bind_cols(
-    acath,
-    pre_pred %>% enframe(name = NULL, value = "pre"),
-    post_pred %>% enframe(name = NULL, value = "post")
-  )
-
-
-
-
-

Conventional Decision Curve

-
- -
-
-
-
-
-
library(dcurves)
-
-dca_prepost <- dca(
-    sigdz ~ pre + post,
-    data = acath_pred,
-    label = list(
-      pre = "Age and Sex",
-      post = "Age, Sex and Cholesterol"))
-
-dca_prepost %>%
-  plot(smooth = TRUE)  + 
-  theme_classic()  +
-  theme(legend.position = "none")
-
-
-

-
-
-
-
-
-
-
-
-
library(rtichoke)
-library(plotly)
-
-performance_data_dc <- 
-  prepare_performance_data(
-  probs = list(
-    "Age and Sex" = 
-      acath_pred$pre,
-    "Age, Sex and Cholesterol" = 
-      acath_pred$post
-  ),
-  reals = list(acath_pred$sigdz)
-)
-
-performance_data_dc %>%
-  plot_decision_curve(
-    col_values = 
-      c("#00BFC4", "#C77CFF"),
-    size = 350
-  ) %>%
-  layout(
-    yaxis = list(
-      range =
-        c(-0.07, 0.7)
-    )
-  )
-
-
-
-
- -
-
-
-
-
-
-
-
-
-

Specific Range of Probability Thresholds

-
- -
-
-
-
-
-
library(dcurves)
-
-dca_prepost_15_35 <- dca(
-    sigdz ~ pre + post,
-    data = acath_pred,
-    thresholds = seq(0.15, 0.35, by = 0.05),
-    label = list(
-      pre = "Age and Sex",
-      post = "Age, Sex and Cholesterol")) %>%
-  plot(type = 'net_benefit', 
-       smooth = FALSE, 
-       show_ggplot_code = FALSE)
-
-dca_prepost_15_35 + 
-  theme_classic()  + 
-  theme(legend.position = "none")
-
-
-

-
-
-
-
-
-
-
-
-
performance_data_dc %>% 
-  rtichoke::plot_decision_curve(
-    col_values = c("#00BFC4", "#C77CFF"),
-    min_p_threshold = 0.15, 
-    max_p_threshold = 0.35,
-    size = 350
-  ) %>% 
-  plotly::layout(
-    yaxis = list(range =
-                   c(-0.07, 0.7))
-  ) 
-
-
-
-
- -
-
-
-
-
-
-
-
-
-

Interventions Avoided

-
- -
-
-
-
-
-
# code
-
-
-

-
-
-
-
-
-
-
-
-
performance_data_dc %>%
-  rtichoke::plot_decision_curve(
-    col_values = c("#F8766D", "#00BFC4"),
-    type = "interventions avoided",
-    size = 350
-  ) %>%
-  plotly::layout(
-    yaxis = list(range =
-                   c(-10, 100))
-  )
-
-
-
-
- -
-
-
-
-
-
-
-
-
-

Conventional and Interventions Avoided Combined (rtichoke code)

-
-
-
-
performance_data_dc %>%
-  plot_decision_curve(
-    col_values = 
-      c("#00BFC4", "#C77CFF"),
-    type = "combined",
-    size = 500
-  )
-
-
-
-
- -
-
-
-
- - -
- -
- -
- - - - \ No newline at end of file diff --git a/_site/posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/interventions_avoided.svg b/_site/posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/interventions_avoided.svg deleted file mode 100644 index 2c84019..0000000 --- a/_site/posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/interventions_avoided.svg +++ /dev/null @@ -1,63 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - -0 -25 -50 -75 -100 - - - - - - - - - - - -0% -25% -50% -75% -100% -Threshold Probability -Net reduction in interventions -per 100 patients - - diff --git a/_site/posts/2022-08-01-precision-recall/image.png b/_site/posts/2022-08-01-precision-recall/image.png deleted file mode 100644 index 6278b8a..0000000 Binary files a/_site/posts/2022-08-01-precision-recall/image.png and /dev/null differ diff --git a/_site/posts/2022-08-01-precision-recall/index.html b/_site/posts/2022-08-01-precision-recall/index.html deleted file mode 100644 index 302ad42..0000000 --- a/_site/posts/2022-08-01-precision-recall/index.html +++ /dev/null @@ -1,436 +0,0 @@ - - - - - - - - - - -rtichoke blog - Precision Recall from Feature Engineering by Max Kuhn and Kjell Johnson - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- -
- -
-
-
-

Precision Recall from Feature Engineering by Max Kuhn and Kjell Johnson

-
-
Replications
-
ROC
-
Precision Recall
-
Max Kuhn
-
Kjell Johnson
-
Feature Engineering by Max Kuhn and Kjell Johnson
-
yardstick
-
-
-
- - -
- - -
-
Published
-
-

July 1, 2022

-
-
- -
- - -
- - - - -
- - - - -
-

Precision Recall and ROC

-

Precision Recall Curve is shown as an alternative to the known ROC curve in the “second part from the ‘Measuring Performance’ Chapter of Feature Engineering and Selection”. It is mentioned that this curve is more appropriate in terms of Information Retrieval.

-
-

Original Code

-

The code is almost identical to the original code that can be found on github.

-

Alternatively you can download the caret object from here and load it into the global environment by running the following command:

-
-
load("okc_glm_keyword.RData")
-
-
-

Creating Performance Metrics Curves with yardstick

-
- -
-
-
-
-
-
library(yardstick)
-library(ggplot2)
-library(magrittr)
-
-glm_keyword$pred  %>% 
-  roc_curve(obs, stem) %>% 
-  autoplot()  +
-  theme_classic()
-
-
-

-
-
-
-
-
-
-
-
-
library(yardstick)
-library(ggplot2)
-library(magrittr)
-
-glm_keyword$pred  %>% 
-  pr_curve(obs, stem) %>% 
-  autoplot()  +
-  theme_classic()
-
-
-

-
-
-
-
-
-
-
-
-

Creating Performance Metrics Curves with rtichoke

-
- -
-
-
-
-
-
library(rtichoke)
-
-create_roc_curve(
-  probs = list(
-    glm_keyword$pred$stem),
-  reals = list(
-    glm_keyword$pred$obs == "stem"),
-  size = 350
-)
-
-
-
-
- -
-
-
-
-
-
-
-
-
-
library(rtichoke)
-
-create_precision_recall_curve(
-  probs = list(
-    glm_keyword$pred$stem),
-  reals = list(
-    glm_keyword$pred$obs == "stem"),
-  size = 350
-)
-
-
-
-
- -
-
-
-
-
-
-
- - -
-
-
- -
- -
- - - - \ No newline at end of file diff --git a/_site/posts/2022-08-01-precision-recall/precision-recall-yardstick.svg b/_site/posts/2022-08-01-precision-recall/precision-recall-yardstick.svg deleted file mode 100644 index b00089c..0000000 --- a/_site/posts/2022-08-01-precision-recall/precision-recall-yardstick.svg +++ /dev/null @@ -1,70 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 - - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 -recall -precision - - diff --git a/_site/posts/2022-08-01-precision-recall/roc-yardstick.svg b/_site/posts/2022-08-01-precision-recall/roc-yardstick.svg deleted file mode 100644 index 37573e5..0000000 --- a/_site/posts/2022-08-01-precision-recall/roc-yardstick.svg +++ /dev/null @@ -1,71 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 - - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 -1 - specificity -sensitivity - - diff --git a/_site/posts/2022-09-04-precision-recall/image.png b/_site/posts/2022-09-04-precision-recall/image.png deleted file mode 100644 index 6278b8a..0000000 Binary files a/_site/posts/2022-09-04-precision-recall/image.png and /dev/null differ diff --git a/_site/posts/2022-09-04-precision-recall/index.html b/_site/posts/2022-09-04-precision-recall/index.html index ffa9999..6d7cebc 100644 --- a/_site/posts/2022-09-04-precision-recall/index.html +++ b/_site/posts/2022-09-04-precision-recall/index.html @@ -429,7 +429,7 @@

+

diff --git a/_site/posts/2022-09-18-From-Haifa-University-to-Clalit Innovation/image.png b/_site/posts/2022-09-18-From-Haifa-University-to-Clalit Innovation/image.png deleted file mode 100644 index 96cc9ca..0000000 Binary files a/_site/posts/2022-09-18-From-Haifa-University-to-Clalit Innovation/image.png and /dev/null differ diff --git a/_site/posts/2022-09-18-From-Haifa-University-to-Clalit Innovation/index.html b/_site/posts/2022-09-18-From-Haifa-University-to-Clalit Innovation/index.html index 1c82435..a11c82f 100644 --- a/_site/posts/2022-09-18-From-Haifa-University-to-Clalit Innovation/index.html +++ b/_site/posts/2022-09-18-From-Haifa-University-to-Clalit Innovation/index.html @@ -244,7 +244,7 @@

Section

} }); -
diff --git a/_site/posts/2022-10-02-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html b/_site/posts/2022-10-02-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html index 73dcfab..1154b0b 100644 --- a/_site/posts/2022-10-02-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html +++ b/_site/posts/2022-10-02-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html @@ -308,7 +308,7 @@

Conventional D c("#00BFC4", "#C77CFF"), size = 350 ) %>% - layout( + plotly::layout( yaxis = list( range = c(-0.07, 0.7) @@ -317,8 +317,8 @@

Conventional D

-
- +
+
@@ -364,7 +364,7 @@

S
performance_data_dc %>% 
-  rtichoke::plot_decision_curve(
+  plot_decision_curve(
     col_values = c("#00BFC4", "#C77CFF"),
     min_p_threshold = 0.15, 
     max_p_threshold = 0.35,
@@ -377,8 +377,8 @@ 

S

-
- +
+
@@ -396,7 +396,12 @@

Interventions Avoide
-
# code
+
dca_prepost %>%
+  net_intervention_avoided() %>% 
+  plot(type = 'net_intervention_avoided', 
+       smooth = FALSE)  + 
+  theme_classic()  +
+  theme(legend.position = "none")

@@ -409,7 +414,7 @@

Interventions Avoide
performance_data_dc %>%
-  rtichoke::plot_decision_curve(
+  plot_decision_curve(
     col_values = c("#F8766D", "#00BFC4"),
     type = "interventions avoided",
     size = 350
@@ -421,8 +426,8 @@ 

Interventions Avoide

-
- +
+
@@ -446,8 +451,8 @@

-
- +
+

@@ -554,7 +559,7 @@

+

diff --git a/_site/posts/post-with-code/a.html b/_site/posts/post-with-code/a.html deleted file mode 100644 index 5143ffc..0000000 --- a/_site/posts/post-with-code/a.html +++ /dev/null @@ -1,296 +0,0 @@ - - - - - - - - - - - -rtichoke blog - Post With Code - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- -
- -
-
-
-

Post With Code

-
-
news
-
code
-
analysis
-
-
-
- - -
- -
-
Author
-
-

Harlow Malloc

-
-
- -
-
Published
-
-

July 25, 2022

-
-
- -
- - -
- - - - -
- - - - -

This is a post with executable code.

-
-
1 + 1
-
-
[1] 2
-
-
- - - -
- -
- - - - \ No newline at end of file diff --git a/_site/posts/post-with-code/image.PNG b/_site/posts/post-with-code/image.PNG deleted file mode 100644 index 3daab7a..0000000 Binary files a/_site/posts/post-with-code/image.PNG and /dev/null differ diff --git a/_site/posts/post-with-code/image.jpg b/_site/posts/post-with-code/image.jpg deleted file mode 100644 index 3ec04c8..0000000 Binary files a/_site/posts/post-with-code/image.jpg and /dev/null differ diff --git a/_site/posts/post-with-code/index.html b/_site/posts/post-with-code/index.html deleted file mode 100644 index fa1e3ec..0000000 --- a/_site/posts/post-with-code/index.html +++ /dev/null @@ -1,497 +0,0 @@ - - - - - - - - - - - -rtichoke blog - Post With Code - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- -
- -
-
-
-

Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson

-
-
Replications
-
ROC
-
-
-
- - -
- -
-
Author
-
-

Harlow Malloc

-
-
- -
-
Published
-
-

June 6, 2012

-
-
- -
- - -
- - - - -
- - - - -
- -
-
-

R-specific examples here.asdfasdfdsad

-
-
-

Python-specific examples here.

-
-
-
-

If there are repeated groups on the same page, their tabs are synced:

-
-

Welcome to rtichoke blog!

-

This blog will be dedicated to the {rtichoke} package, which means that it will contain posts that are related to performance metrics and the possible related usability of {rtichoke}.

-
-
-

Replications

-

To make the package easier to use I plan to reproduce other people’s code with rtichoke, posts of this kind will be available under the category “replications”.

-

My first choice is to replicate the first example from the book “Feature Engineering and Selection: A Practical Approach for Predictive Models by Max Kuhn and Kjell Johnson”.

-

In this example you can see how Box-Cox transformation improves the discrimination capability of the logistic regression model without using any additional information.

-
-
-

Original Code

-

The code is almost identical to the original code that can be found on github.

-
-

Preparing the Data

-
-
library(caret)
-library(tidymodels)
-library(ggplot2)
-
-
-data(segmentationData)
-
-segmentationData <- 
-  segmentationData %>% 
-  dplyr::select(EqSphereAreaCh1, PerimCh1, Class, Case) %>% 
-  setNames(c("PredictorA", "PredictorB", "Class", "Case")) %>% 
-  mutate(Class = factor(ifelse(Class == "PS", "One", "Two")))
-
-example_train <- 
-  segmentationData %>% 
-  dplyr::filter(Case == "Train") %>% 
-  dplyr::select(-Case)
-
-example_test  <- 
-  segmentationData %>% 
-  dplyr::filter(Case == "Test") %>% 
-  dplyr::select(-Case)
-
-
-
-

Training the Models

-
-
example_ctrl <- 
-  trainControl(method = "none",
-               classProbs = TRUE,
-               summaryFunction = twoClassSummary)
-
-natural_terms <- train(Class ~ PredictorA + PredictorB,
-                       data = example_train,
-                       method = "glm",
-                       metric = "ROC",
-                       trControl = example_ctrl)
-
-trans_terms <- train(Class ~ PredictorA + PredictorB,
-                     data = example_train,
-                     method = "glm",
-                     preProc = "BoxCox",
-                     metric = "ROC",
-                     trControl = example_ctrl)
-
-
-
-

Creating ROC Curve with yardstick

-
-
natural_dat <-
-  example_test %>% 
-  mutate(
-    prob = predict(natural_terms, example_test, type = "prob")[,1]) %>% 
-  roc_curve(Class, prob) %>% 
-  mutate(Format = "Natural Units")
-
-trans_dat <-
-  example_test %>% 
-  mutate(
-    prob = predict(trans_terms, example_test, type = "prob")[,1]) %>% 
-  roc_curve(Class, prob) %>% 
-  mutate(Format = "Inverse Units") 
-
-both_dat <- 
-  bind_rows(natural_dat, trans_dat) %>%
-  mutate(
-    Format = factor(Format, levels = c("Natural Units", "Inverse Units")))
-
-trans_roc_plot <- 
-  ggplot(both_dat) +
-  geom_step(aes(x = 1 - specificity, y = sensitivity, color = Format)) + 
-  coord_equal() + 
-  xlab("False Positive Rate") + 
-  ylab("True Positive Rate") + 
-  theme(legend.position = c(.8, .2)) + 
-  scale_colour_manual(
-    values = c("Natural Units" = "grey", 
-               "Inverse Units" = "black")) + 
-  geom_abline(intercept = 0, slope = 1, col = "grey", lty = 2) +
-  theme_classic()
-
-trans_roc_plot
-
-

-
-
-
-

rtichoke code

-
-

Creating ROC Curve with rtichoke

-
-
-
-

By Probability Threshold

-
-
library(rtichoke)
-
-create_roc_curve(
-  probs = list(
-    "Natural Units" = predict(natural_terms, 
-                            example_test, type = "prob")[,1],
-    "Inverse Units" = predict(trans_terms, 
-                            example_test, type = "prob")[,1]
-  ),
-  reals = list(example_test$Class == "One"),
-  size = 400, 
-  col_values = c("grey", "black")
-) 
-
-
-
-
- -
-
-
-
-

By Percent Positives Conditional Rate

-
-
library(rtichoke)
-
-create_roc_curve(
-  probs = list(
-    "Natural Units" = predict(natural_terms, 
-                            example_test, type = "prob")[,1],
-    "Inverse Units" = predict(trans_terms, 
-                            example_test, type = "prob")[,1]
-  ),
-  reals = list(example_test$Class == "One"),
-  stratified_by = "ppcr",
-  size = 400, 
-  col_values = c("grey", "black")
-) 
-
-
-
-
- -
-
- - -
- -
- - -
- - - - \ No newline at end of file diff --git a/_site/posts/post-with-code/trans_roc_plot.svg b/_site/posts/post-with-code/trans_roc_plot.svg deleted file mode 100644 index b3d40a9..0000000 --- a/_site/posts/post-with-code/trans_roc_plot.svg +++ /dev/null @@ -1,79 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 - - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 -False Positive Rate -True Positive Rate - - -Format - - -Natural Units -Inverse Units - - diff --git a/_site/posts/welcome/index.html b/_site/posts/welcome/index.html deleted file mode 100644 index 8437298..0000000 --- a/_site/posts/welcome/index.html +++ /dev/null @@ -1,259 +0,0 @@ - - - - - - - - - - - -rtichoke blog - Welcome To My Blog - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- -
- -
-
-
-

Welcome To My Blog

-
-
news
-
-
-
- - -
- -
-
Author
-
-

Tristan O’Malley

-
-
- -
-
Published
-
-

August 16, 2022

-
-
- -
- - -
- - - - -
- - - - -

This is the first post in a Quarto blog. Welcome!

-

-

Since this post doesn’t specify an explicit image, the first image in the post will be used in the listing page of posts.

- - - -
- - -
- - - - \ No newline at end of file diff --git a/_site/posts/welcome/thumbnail.jpg b/_site/posts/welcome/thumbnail.jpg deleted file mode 100644 index 8e3107c..0000000 Binary files a/_site/posts/welcome/thumbnail.jpg and /dev/null differ diff --git a/_site/profile.jpg b/_site/profile.jpg deleted file mode 100644 index 9d50b91..0000000 Binary files a/_site/profile.jpg and /dev/null differ diff --git a/_site/search.json b/_site/search.json index ac2b197..e7f8b07 100644 --- a/_site/search.json +++ b/_site/search.json @@ -1,4 +1,11 @@ [ + { + "objectID": "about_me.html", + "href": "about_me.html", + "title": "Uriah Finkel", + "section": "", + "text": "Data Scientist in Clalit Innovation\nCo-Admin of Israeli R community “R for the Masses”" + }, { "objectID": "index.html", "href": "index.html", @@ -7,283 +14,80 @@ "text": "This blog is dedicated to reproducible examples by rtichoke in order to help new users to have a quick start. For more conventional documentation of the package please visit rtichoke pkgdown website.\nSome of the posts will be replications of existing code that produces the same output as rtichoke by different packages.\nOther posts will be more focused on theoretical issues, but always with a reproducible code.\nI will also share some talks that might be about everything as long as there are some relevant outputs by rtichoke (Most of the talks will be in Hebrew but the slides will always be in English)." }, { - "objectID": "about.html", - "href": "about.html", - "title": "About", - "section": "", - "text": "About this blog" - }, - { - "objectID": "posts/post-with-code/index.html", - "href": "posts/post-with-code/index.html", - "title": "Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson", - "section": "", - "text": "RPython\n\n\nR-specific examples here.asdfasdfdsad\n\n\nPython-specific examples here.\nIf there are repeated groups on the same page, their tabs are synced:" - }, - { - "objectID": "posts/welcome/index.html", - "href": "posts/welcome/index.html", - "title": "Welcome To My Blog", - "section": "", - "text": "Since this post doesn’t specify an explicit image, the first image in the post will be used in the listing page of posts." - }, - { - "objectID": "posts/post-with-code/a.html", - "href": "posts/post-with-code/a.html", - "title": "Post With Code", - "section": "", - "text": "1 + 1\n\n[1] 2" - }, - { - "objectID": "posts/post-with-code/index.html#welcome-to-rtichoke-blog", - "href": "posts/post-with-code/index.html#welcome-to-rtichoke-blog", - "title": "Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson", - "section": "Welcome to rtichoke blog!", - "text": "Welcome to rtichoke blog!\nThis blog will be dedicated to the {rtichoke} package, which means that it will contain posts that are related to performance metrics and the possible related usability of {rtichoke}." - }, - { - "objectID": "posts/post-with-code/index.html#replications", - "href": "posts/post-with-code/index.html#replications", - "title": "Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson", - "section": "Replications", - "text": "Replications\nTo make the package easier to use I plan to reproduce other people’s code with rtichoke, posts of this kind will be available under the category “replications”.\nMy first choice is to replicate the first example from the book “Feature Engineering and Selection: A Practical Approach for Predictive Models by Max Kuhn and Kjell Johnson”.\nIn this example you can see how Box-Cox transformation improves the discrimination capability of the logistic regression model without using any additional information." - }, - { - "objectID": "posts/post-with-code/index.html#original-code", - "href": "posts/post-with-code/index.html#original-code", - "title": "Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson", - "section": "Original Code", - "text": "Original Code\nThe code is almost identical to the original code that can be found on github.\n\nPreparing the Data\n\nlibrary(caret)\nlibrary(tidymodels)\nlibrary(ggplot2)\n\n\ndata(segmentationData)\n\nsegmentationData <- \n segmentationData %>% \n dplyr::select(EqSphereAreaCh1, PerimCh1, Class, Case) %>% \n setNames(c(\"PredictorA\", \"PredictorB\", \"Class\", \"Case\")) %>% \n mutate(Class = factor(ifelse(Class == \"PS\", \"One\", \"Two\")))\n\nexample_train <- \n segmentationData %>% \n dplyr::filter(Case == \"Train\") %>% \n dplyr::select(-Case)\n\nexample_test <- \n segmentationData %>% \n dplyr::filter(Case == \"Test\") %>% \n dplyr::select(-Case)\n\n\n\nTraining the Models\n\nexample_ctrl <- \n trainControl(method = \"none\",\n classProbs = TRUE,\n summaryFunction = twoClassSummary)\n\nnatural_terms <- train(Class ~ PredictorA + PredictorB,\n data = example_train,\n method = \"glm\",\n metric = \"ROC\",\n trControl = example_ctrl)\n\ntrans_terms <- train(Class ~ PredictorA + PredictorB,\n data = example_train,\n method = \"glm\",\n preProc = \"BoxCox\",\n metric = \"ROC\",\n trControl = example_ctrl)\n\n\n\nCreating ROC Curve with yardstick\n\nnatural_dat <-\n example_test %>% \n mutate(\n prob = predict(natural_terms, example_test, type = \"prob\")[,1]) %>% \n roc_curve(Class, prob) %>% \n mutate(Format = \"Natural Units\")\n\ntrans_dat <-\n example_test %>% \n mutate(\n prob = predict(trans_terms, example_test, type = \"prob\")[,1]) %>% \n roc_curve(Class, prob) %>% \n mutate(Format = \"Inverse Units\") \n\nboth_dat <- \n bind_rows(natural_dat, trans_dat) %>%\n mutate(\n Format = factor(Format, levels = c(\"Natural Units\", \"Inverse Units\")))\n\ntrans_roc_plot <- \n ggplot(both_dat) +\n geom_step(aes(x = 1 - specificity, y = sensitivity, color = Format)) + \n coord_equal() + \n xlab(\"False Positive Rate\") + \n ylab(\"True Positive Rate\") + \n theme(legend.position = c(.8, .2)) + \n scale_colour_manual(\n values = c(\"Natural Units\" = \"grey\", \n \"Inverse Units\" = \"black\")) + \n geom_abline(intercept = 0, slope = 1, col = \"grey\", lty = 2) +\n theme_classic()\n\ntrans_roc_plot" - }, - { - "objectID": "posts/post-with-code/index.html#rtichoke-code", - "href": "posts/post-with-code/index.html#rtichoke-code", - "title": "Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson", - "section": "rtichoke code", - "text": "rtichoke code\n\nCreating ROC Curve with rtichoke" - }, - { - "objectID": "posts/post-with-code/index.html#by-probability-threshold", - "href": "posts/post-with-code/index.html#by-probability-threshold", - "title": "Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson", - "section": "By Probability Threshold", - "text": "By Probability Threshold\n\nlibrary(rtichoke)\n\ncreate_roc_curve(\n probs = list(\n \"Natural Units\" = predict(natural_terms, \n example_test, type = \"prob\")[,1],\n \"Inverse Units\" = predict(trans_terms, \n example_test, type = \"prob\")[,1]\n ),\n reals = list(example_test$Class == \"One\"),\n size = 400, \n col_values = c(\"grey\", \"black\")\n)" - }, - { - "objectID": "posts/post-with-code/index.html#by-percent-positives-conditional-rate", - "href": "posts/post-with-code/index.html#by-percent-positives-conditional-rate", - "title": "Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson", - "section": "By Percent Positives Conditional Rate", - "text": "By Percent Positives Conditional Rate\n\nlibrary(rtichoke)\n\ncreate_roc_curve(\n probs = list(\n \"Natural Units\" = predict(natural_terms, \n example_test, type = \"prob\")[,1],\n \"Inverse Units\" = predict(trans_terms, \n example_test, type = \"prob\")[,1]\n ),\n reals = list(example_test$Class == \"One\"),\n stratified_by = \"ppcr\",\n size = 400, \n col_values = c(\"grey\", \"black\")\n)" - }, - { - "objectID": "posts/2022-06-06-cox-box-transformation/a.html", - "href": "posts/2022-06-06-cox-box-transformation/a.html", - "title": "Post With Code", - "section": "", - "text": "1 + 1\n\n[1] 2" - }, - { - "objectID": "posts/2022-06-06-cox-box-transformation/index.html", - "href": "posts/2022-06-06-cox-box-transformation/index.html", + "objectID": "posts/2022-08-21-cox-box-transformation/index.html", + "href": "posts/2022-08-21-cox-box-transformation/index.html", "title": "Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson", "section": "", "text": "This blog will be dedicated to the {rtichoke} package, which means that it will contain posts that are related to performance metrics and the possible related usability of {rtichoke}." }, { - "objectID": "posts/2022-06-06-cox-box-transformation/index.html#welcome-to-rtichoke-blog", - "href": "posts/2022-06-06-cox-box-transformation/index.html#welcome-to-rtichoke-blog", - "title": "Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson", - "section": "Welcome to rtichoke blog!", - "text": "Welcome to rtichoke blog!\nThis blog will be dedicated to the {rtichoke} package, which means that it will contain posts that are related to performance metrics and the possible related usability of {rtichoke}." - }, - { - "objectID": "posts/2022-06-06-cox-box-transformation/index.html#replications", - "href": "posts/2022-06-06-cox-box-transformation/index.html#replications", + "objectID": "posts/2022-08-21-cox-box-transformation/index.html#replications", + "href": "posts/2022-08-21-cox-box-transformation/index.html#replications", "title": "Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson", "section": "Replications", "text": "Replications\nTo make the package easier to use I plan to reproduce other people’s code with rtichoke, posts of this kind will be available under the category “replications”.\nMy first choice is to replicate the first example from the book “Feature Engineering and Selection: A Practical Approach for Predictive Models by Max Kuhn and Kjell Johnson”.\nIn this example you can see how Box-Cox transformation improves the discrimination capability of the logistic regression model without using any additional information." }, { - "objectID": "posts/2022-06-06-cox-box-transformation/index.html#original-code", - "href": "posts/2022-06-06-cox-box-transformation/index.html#original-code", + "objectID": "posts/2022-08-21-cox-box-transformation/index.html#original-code", + "href": "posts/2022-08-21-cox-box-transformation/index.html#original-code", "title": "Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson", "section": "Original Code", "text": "Original Code\nThe code is almost identical to the original code that can be found on github.\n\nPreparing the Data\n\nlibrary(caret)\nlibrary(tidymodels)\nlibrary(ggplot2)\n\n\ndata(segmentationData)\n\nsegmentationData <- \n segmentationData %>% \n dplyr::select(EqSphereAreaCh1, PerimCh1, Class, Case) %>% \n setNames(c(\"PredictorA\", \"PredictorB\", \"Class\", \"Case\")) %>% \n mutate(Class = factor(ifelse(Class == \"PS\", \"One\", \"Two\")))\n\nexample_train <- \n segmentationData %>% \n dplyr::filter(Case == \"Train\") %>% \n dplyr::select(-Case)\n\nexample_test <- \n segmentationData %>% \n dplyr::filter(Case == \"Test\") %>% \n dplyr::select(-Case)\n\n\n\nTraining the Models\n\nexample_ctrl <- \n trainControl(method = \"none\",\n classProbs = TRUE,\n summaryFunction = twoClassSummary)\n\nnatural_terms <- train(Class ~ PredictorA + PredictorB,\n data = example_train,\n method = \"glm\",\n metric = \"ROC\",\n trControl = example_ctrl)\n\ntrans_terms <- train(Class ~ PredictorA + PredictorB,\n data = example_train,\n method = \"glm\",\n preProc = \"BoxCox\",\n metric = \"ROC\",\n trControl = example_ctrl)\n\n\n\nCreating Predictions\n\noriginal_probs <- predict(natural_terms, example_test, type = \"prob\")[,1]\n\ntransformed_probs <- predict(trans_terms, example_test, type = \"prob\")[,1]\n\noutcomes <- example_test$Class == \"One\"\n\n\n\nCreating ROC Curve with yardstick\n\nnatural_dat <-\n example_test %>% \n mutate(\n prob = original_probs) %>% \n roc_curve(Class, prob) %>% \n mutate(Format = \"Natural Units\")\n\ntrans_dat <-\n example_test %>% \n mutate(\n prob = transformed_probs) %>% \n roc_curve(Class, prob) %>% \n mutate(Format = \"Inverse Units\") \n\nboth_dat <- \n bind_rows(natural_dat, trans_dat) %>%\n mutate(\n Format = factor(Format, levels = c(\"Natural Units\", \"Inverse Units\")))\n\ntrans_roc_plot <- \n ggplot(both_dat) +\n geom_step(aes(x = 1 - specificity, y = sensitivity, color = Format)) + \n coord_equal() + \n xlab(\"False Positive Rate\") + \n ylab(\"True Positive Rate\") + \n theme(legend.position = c(.8, .2)) + \n scale_colour_manual(\n values = c(\"Natural Units\" = \"grey\", \n \"Inverse Units\" = \"black\")) + \n geom_abline(intercept = 0, slope = 1, col = \"grey\", lty = 2) +\n theme_classic()\n\ntrans_roc_plot" }, { - "objectID": "posts/2022-06-06-cox-box-transformation/index.html#rtichoke-code", - "href": "posts/2022-06-06-cox-box-transformation/index.html#rtichoke-code", + "objectID": "posts/2022-08-21-cox-box-transformation/index.html#rtichoke-code", + "href": "posts/2022-08-21-cox-box-transformation/index.html#rtichoke-code", "title": "Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson", "section": "rtichoke code", "text": "rtichoke code\n\nCreating ROC Curve with rtichoke\n\nBy Probability ThresholdBy Percent Positives Conditional Rate\n\n\n\n\n\nlibrary(rtichoke)\n\ncreate_roc_curve(\n probs = list(\n \"Natural Units\" = original_probs,\n \"Inverse Units\" = transformed_probs\n ),\n reals = list(\n outcomes\n ),\n size = 350, \n col_values = c(\"grey\", \"black\")\n) \n\n\n\n\n\n\n\n\n\n\n\n\n\n\nlibrary(rtichoke)\n\ncreate_roc_curve(\n probs = list(\n \"Natural Units\" = original_probs,\n \"Inverse Units\" = transformed_probs\n ),\n reals = list(\n outcomes\n ),\n stratified_by = \"ppcr\",\n size = 350, \n col_values = c(\"grey\", \"black\")\n)" }, { - "objectID": "posts/2022-06-06-cox-box-transformation/index.html#by-probability-threshold", - "href": "posts/2022-06-06-cox-box-transformation/index.html#by-probability-threshold", - "title": "Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson", - "section": "By Probability Threshold", - "text": "By Probability Threshold\n\nlibrary(rtichoke)\n\ncreate_roc_curve(\n probs = list(\n \"Natural Units\" = predict(natural_terms, \n example_test, type = \"prob\")[,1],\n \"Inverse Units\" = predict(trans_terms, \n example_test, type = \"prob\")[,1]\n ),\n reals = list(example_test$Class == \"One\"),\n size = 400, \n col_values = c(\"grey\", \"black\")\n)" - }, - { - "objectID": "posts/2022-06-06-cox-box-transformation/index.html#by-percent-positives-conditional-rate", - "href": "posts/2022-06-06-cox-box-transformation/index.html#by-percent-positives-conditional-rate", - "title": "Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson", - "section": "By Percent Positives Conditional Rate", - "text": "By Percent Positives Conditional Rate\n\nlibrary(rtichoke)\n\ncreate_roc_curve(\n probs = list(\n \"Natural Units\" = predict(natural_terms, \n example_test, type = \"prob\")[,1],\n \"Inverse Units\" = predict(trans_terms, \n example_test, type = \"prob\")[,1]\n ),\n reals = list(example_test$Class == \"One\"),\n stratified_by = \"ppcr\",\n size = 400, \n col_values = c(\"grey\", \"black\")\n)" - }, - { - "objectID": "posts/2022-06-06-cox-box-transformation/index.html#bla", - "href": "posts/2022-06-06-cox-box-transformation/index.html#bla", - "title": "Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson", - "section": "bla", - "text": "bla\n\n\nList One\n\nItem A\nItem B\nItem C\n\n\n\nList Two\n\nItem X\nItem Y\nItem Z" - }, - { - "objectID": "posts/2022-06-06-cox-box-transformation/index.knit.html", - "href": "posts/2022-06-06-cox-box-transformation/index.knit.html", - "title": "Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson", - "section": "", - "text": "If there are repeated groups on the same page, their tabs are synced:" - }, - { - "objectID": "posts/2022-06-06-cox-box-transformation/index.knit.html#welcome-to-rtichoke-blog", - "href": "posts/2022-06-06-cox-box-transformation/index.knit.html#welcome-to-rtichoke-blog", - "title": "Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson", - "section": "Welcome to rtichoke blog!", - "text": "Welcome to rtichoke blog!\nThis blog will be dedicated to the {rtichoke} package, which means that it will contain posts that are related to performance metrics and the possible related usability of {rtichoke}." - }, - { - "objectID": "posts/2022-06-06-cox-box-transformation/index.knit.html#replications", - "href": "posts/2022-06-06-cox-box-transformation/index.knit.html#replications", - "title": "Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson", - "section": "Replications", - "text": "Replications\nTo make the package easier to use I plan to reproduce other people’s code with rtichoke, posts of this kind will be available under the category “replications”.\nMy first choice is to replicate the first example from the book “Feature Engineering and Selection: A Practical Approach for Predictive Models by Max Kuhn and Kjell Johnson”.\nIn this example you can see how Box-Cox transformation improves the discrimination capability of the logistic regression model without using any additional information." - }, - { - "objectID": "posts/2022-06-06-cox-box-transformation/index.knit.html#original-code", - "href": "posts/2022-06-06-cox-box-transformation/index.knit.html#original-code", - "title": "Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson", - "section": "Original Code", - "text": "Original Code\nThe code is almost identical to the original code that can be found on github.\n\nPreparing the Data\n\nlibrary(caret)\nlibrary(tidymodels)\nlibrary(ggplot2)\n\n\ndata(segmentationData)\n\nsegmentationData <- \n segmentationData %>% \n dplyr::select(EqSphereAreaCh1, PerimCh1, Class, Case) %>% \n setNames(c(\"PredictorA\", \"PredictorB\", \"Class\", \"Case\")) %>% \n mutate(Class = factor(ifelse(Class == \"PS\", \"One\", \"Two\")))\n\nexample_train <- \n segmentationData %>% \n dplyr::filter(Case == \"Train\") %>% \n dplyr::select(-Case)\n\nexample_test <- \n segmentationData %>% \n dplyr::filter(Case == \"Test\") %>% \n dplyr::select(-Case)\n\n\n\nTraining the Models\n\nexample_ctrl <- \n trainControl(method = \"none\",\n classProbs = TRUE,\n summaryFunction = twoClassSummary)\n\nnatural_terms <- train(Class ~ PredictorA + PredictorB,\n data = example_train,\n method = \"glm\",\n metric = \"ROC\",\n trControl = example_ctrl)\n\ntrans_terms <- train(Class ~ PredictorA + PredictorB,\n data = example_train,\n method = \"glm\",\n preProc = \"BoxCox\",\n metric = \"ROC\",\n trControl = example_ctrl)\n\n\n\nCreating Predictions\n\noriginal_predictions <- predict(natural_terms, example_test, type = \"prob\")[,1]\n\ntransformed_predictions <- predict(trans_terms, example_test, type = \"prob\")[,1]\n\noutcomes <- example_test$Class == \"One\"\n\n\n\nCreating ROC Curve with yardstick\n\nnatural_dat <-\n example_test %>% \n mutate(\n prob = original_predictions) %>% \n roc_curve(Class, prob) %>% \n mutate(Format = \"Natural Units\")\n\ntrans_dat <-\n example_test %>% \n mutate(\n prob = transformed_predictions) %>% \n roc_curve(Class, prob) %>% \n mutate(Format = \"Inverse Units\") \n\nboth_dat <- \n bind_rows(natural_dat, trans_dat) %>%\n mutate(\n Format = factor(Format, levels = c(\"Natural Units\", \"Inverse Units\")))\n\ntrans_roc_plot <- \n ggplot(both_dat) +\n geom_step(aes(x = 1 - specificity, y = sensitivity, color = Format)) + \n coord_equal() + \n xlab(\"False Positive Rate\") + \n ylab(\"True Positive Rate\") + \n theme(legend.position = c(.8, .2)) + \n scale_colour_manual(\n values = c(\"Natural Units\" = \"grey\", \n \"Inverse Units\" = \"black\")) + \n geom_abline(intercept = 0, slope = 1, col = \"grey\", lty = 2) +\n theme_classic()\n\ntrans_roc_plot" - }, - { - "objectID": "posts/2022-06-06-cox-box-transformation/index.knit.html#rtichoke-code", - "href": "posts/2022-06-06-cox-box-transformation/index.knit.html#rtichoke-code", - "title": "Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson", - "section": "rtichoke code", - "text": "rtichoke code\n\nCreating ROC Curve with rtichoke\n\nBy Probability ThresholdBy Percent Positives Conditional Rate\n\n\n\n\n\nCode\nlibrary(rtichoke)\n\ncreate_roc_curve(\n probs = list(\n \"Natural Units\" = original_predictions,\n \"Inverse Units\" = transformed_predictions\n ),\n reals = list(\n outcomes\n ),\n size = 400, \n col_values = c(\"grey\", \"black\")\n) \n\n\nOutput\n\n\n\n\n\n\n\n\n\n\n\n\nCode\nlibrary(rtichoke)\n\ncreate_roc_curve(\n probs = list(\n \"Natural Units\" = original_predictions,\n \"Inverse Units\" = transformed_predictions\n ),\n reals = list(\n outcomes\n ),\n stratified_by = \"ppcr\",\n size = 400, \n col_values = c(\"grey\", \"black\")\n) \n\n\nOutput" - }, - { - "objectID": "blog.html", - "href": "blog.html", - "title": "rtichoke blog", - "section": "", - "text": "Replications\n\n\nROC\n\n\nPrecision Recall\n\n\n\n\n\n\n\n\n\n\n\nJul 1, 2022\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n\n\nReplications\n\n\nROC\n\n\n\n\n\n\n\n\n\n\n\nJun 6, 2012\n\n\n\n\n\n\nNo matching items" - }, - { - "objectID": "posts.html", - "href": "posts.html", - "title": "Posts", - "section": "", - "text": "Replications\n\n\nROC\n\n\nMax Kuhn\n\n\nKjell Johnson\n\n\nFeature Engineering by Max Kuhn and Kjell Johnson\n\n\ncaret\n\n\n\n\n\n\n\n\n\n\n\nAug 21, 2022\n\n\n\n\n\n\nNo matching items" - }, - { - "objectID": "about_me.html", - "href": "about_me.html", - "title": "Uriah Finkel", - "section": "", - "text": "Author of rtichoke\nData Scientist in Clalit Innovation\nCo-Admin of Israeli R community “R for the Masses”" - }, - { - "objectID": "about_me.html#education", - "href": "about_me.html#education", - "title": "Finley Malloc", - "section": "Education", - "text": "Education\nUniversity of Haifa | Israel M.A in Statistics\nUniversity of Haifa | Israel B.A in Statistics & Economics" - }, - { - "objectID": "posts/2022-08-01-precision-recall/index.html", - "href": "posts/2022-08-01-precision-recall/index.html", - "title": "Precision Recall from Feature Engineering by Max Kuhn and Kjell Johnson", - "section": "", - "text": "Precision Recall Curve is shown as an alternative to the known ROC curve in the “second part from the ‘Measuring Performance’ Chapter of Feature Engineering and Selection”. It is mentioned that this curve is more appropriate in terms of Information Retrieval.\n\n\nThe code is almost identical to the original code that can be found on github.\nAlternatively you can download the caret object from here and load it into the global environment by running the following command:\n\nload(\"okc_glm_keyword.RData\")\n\n\n\n\nROCPrecision Recall\n\n\n\n\n\nlibrary(yardstick)\nlibrary(ggplot2)\nlibrary(magrittr)\n\nglm_keyword$pred %>% \n roc_curve(obs, stem) %>% \n autoplot() +\n theme_classic()\n\n\n\n\n\n\n\n\n\n\n\nlibrary(yardstick)\nlibrary(ggplot2)\nlibrary(magrittr)\n\nglm_keyword$pred %>% \n pr_curve(obs, stem) %>% \n autoplot() +\n theme_classic()\n\n\n\n\n\n\n\n\n\n\n\n\n\nROCPrecision Recall\n\n\n\n\n\nlibrary(rtichoke)\n\ncreate_roc_curve(\n probs = list(\n glm_keyword$pred$stem),\n reals = list(\n glm_keyword$pred$obs == \"stem\"),\n size = 350\n)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nlibrary(rtichoke)\n\ncreate_precision_recall_curve(\n probs = list(\n glm_keyword$pred$stem),\n reals = list(\n glm_keyword$pred$obs == \"stem\"),\n size = 350\n)" - }, - { - "objectID": "posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html", - "href": "posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html", + "objectID": "posts/2022-10-02-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html", + "href": "posts/2022-10-02-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html", "title": "DCA for Quantifying the Additional Benefit of a New Marker by Emily Vertosick and Andrew Vickers", "section": "", "text": "Prediction Model might gain accuracy if you’ll add more relevant features to existing models, but many times it’s not obvious what is the additional value of additional feature and how to quantify it in terms of Decision Making. The post Decision curve analysis for quantifying the additional benefit of a new marker by Emily Vertosick and Andrew Vickers show a simple example (the code presented here is almost identical to the original code presented in the link)." }, { - "objectID": "posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html#preparing-the-data", - "href": "posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html#preparing-the-data", + "objectID": "posts/2022-10-02-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html#preparing-the-data", + "href": "posts/2022-10-02-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html#preparing-the-data", "title": "DCA for Quantifying the Additional Benefit of a New Marker by Emily Vertosick and Andrew Vickers", "section": "Preparing the Data", "text": "Preparing the Data\n\nLoading the Data with Hmisc\n\nlibrary(Hmisc)\nlibrary(dplyr)\nlibrary(tibble)\n\ngetHdata(acath)\nacath <- subset(acath, !is.na(choleste))\n\n\n\nFitting Logistic Regressions with rms\n\nlibrary(rms)\n\npre <- lrm(sigdz ~ rcs(age,4) * sex, data = acath)\npre_pred <- predict(pre, type='fitted')\n\npost <- lrm(sigdz ~ rcs(age,4) * sex + \n rcs(choleste,4) + rcs(age,4) %ia% rcs(choleste,4), data = acath)\npost_pred <- predict(post, type='fitted')\n\nacath_pred <- bind_cols(\n acath,\n pre_pred %>% enframe(name = NULL, value = \"pre\"),\n post_pred %>% enframe(name = NULL, value = \"post\")\n )" }, { - "objectID": "posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html#conventional-decision-curve", - "href": "posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html#conventional-decision-curve", + "objectID": "posts/2022-10-02-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html#conventional-decision-curve", + "href": "posts/2022-10-02-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html#conventional-decision-curve", "title": "DCA for Quantifying the Additional Benefit of a New Marker by Emily Vertosick and Andrew Vickers", "section": "Conventional Decision Curve", - "text": "Conventional Decision Curve\n\ndcurvesrtichoke\n\n\n\n\n\nlibrary(dcurves)\n\ndca_prepost <- dca(\n sigdz ~ pre + post,\n data = acath_pred,\n label = list(\n pre = \"Age and Sex\",\n post = \"Age, Sex and Cholesterol\"))\n\ndca_prepost %>%\n plot(smooth = TRUE) + \n theme_classic() +\n theme(legend.position = \"none\")\n\n\n\n\n\n\n\n\n\n\n\nlibrary(rtichoke)\nlibrary(plotly)\n\nperformance_data_dc <- \n prepare_performance_data(\n probs = list(\n \"Age and Sex\" = \n acath_pred$pre,\n \"Age, Sex and Cholesterol\" = \n acath_pred$post\n ),\n reals = list(acath_pred$sigdz)\n)\n\nperformance_data_dc %>%\n plot_decision_curve(\n col_values = \n c(\"#00BFC4\", \"#C77CFF\"),\n size = 350\n ) %>%\n layout(\n yaxis = list(\n range =\n c(-0.07, 0.7)\n )\n )" + "text": "Conventional Decision Curve\n\ndcurvesrtichoke\n\n\n\n\n\nlibrary(dcurves)\n\ndca_prepost <- dca(\n sigdz ~ pre + post,\n data = acath_pred,\n label = list(\n pre = \"Age and Sex\",\n post = \"Age, Sex and Cholesterol\"))\n\ndca_prepost %>%\n plot(smooth = TRUE) + \n theme_classic() +\n theme(legend.position = \"none\")\n\n\n\n\n\n\n\n\n\n\n\nlibrary(rtichoke)\nlibrary(plotly)\n\nperformance_data_dc <- \n prepare_performance_data(\n probs = list(\n \"Age and Sex\" = \n acath_pred$pre,\n \"Age, Sex and Cholesterol\" = \n acath_pred$post\n ),\n reals = list(acath_pred$sigdz)\n)\n\nperformance_data_dc %>%\n plot_decision_curve(\n col_values = \n c(\"#00BFC4\", \"#C77CFF\"),\n size = 350\n ) %>%\n plotly::layout(\n yaxis = list(\n range =\n c(-0.07, 0.7)\n )\n )" }, { - "objectID": "posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html#specific-range-of-probability-thresholds", - "href": "posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html#specific-range-of-probability-thresholds", + "objectID": "posts/2022-10-02-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html#specific-range-of-probability-thresholds", + "href": "posts/2022-10-02-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html#specific-range-of-probability-thresholds", "title": "DCA for Quantifying the Additional Benefit of a New Marker by Emily Vertosick and Andrew Vickers", "section": "Specific Range of Probability Thresholds", - "text": "Specific Range of Probability Thresholds\n\ndcurvesrtichoke\n\n\n\n\n\nlibrary(dcurves)\n\ndca_prepost_15_35 <- dca(\n sigdz ~ pre + post,\n data = acath_pred,\n thresholds = seq(0.15, 0.35, by = 0.05),\n label = list(\n pre = \"Age and Sex\",\n post = \"Age, Sex and Cholesterol\")) %>%\n plot(type = 'net_benefit', \n smooth = FALSE, \n show_ggplot_code = FALSE)\n\ndca_prepost_15_35 + \n theme_classic() + \n theme(legend.position = \"none\")\n\n\n\n\n\n\n\n\n\n\n\nperformance_data_dc %>% \n rtichoke::plot_decision_curve(\n col_values = c(\"#00BFC4\", \"#C77CFF\"),\n min_p_threshold = 0.15, \n max_p_threshold = 0.35,\n size = 350\n ) %>% \n plotly::layout(\n yaxis = list(range =\n c(-0.07, 0.7))\n )" + "text": "Specific Range of Probability Thresholds\n\ndcurvesrtichoke\n\n\n\n\n\nlibrary(dcurves)\n\ndca_prepost_15_35 <- dca(\n sigdz ~ pre + post,\n data = acath_pred,\n thresholds = seq(0.15, 0.35, by = 0.05),\n label = list(\n pre = \"Age and Sex\",\n post = \"Age, Sex and Cholesterol\")) %>%\n plot(type = 'net_benefit', \n smooth = FALSE, \n show_ggplot_code = FALSE)\n\ndca_prepost_15_35 + \n theme_classic() + \n theme(legend.position = \"none\")\n\n\n\n\n\n\n\n\n\n\n\nperformance_data_dc %>% \n plot_decision_curve(\n col_values = c(\"#00BFC4\", \"#C77CFF\"),\n min_p_threshold = 0.15, \n max_p_threshold = 0.35,\n size = 350\n ) %>% \n plotly::layout(\n yaxis = list(range =\n c(-0.07, 0.7))\n )" }, { - "objectID": "posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html#interventions-avoided", - "href": "posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html#interventions-avoided", + "objectID": "posts/2022-10-02-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html#interventions-avoided", + "href": "posts/2022-10-02-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html#interventions-avoided", "title": "DCA for Quantifying the Additional Benefit of a New Marker by Emily Vertosick and Andrew Vickers", "section": "Interventions Avoided", - "text": "Interventions Avoided\n\ndcurvesrtichoke\n\n\n\n\n\n# code\n\n\n\n\n\n\n\n\n\n\n\nperformance_data_dc %>%\n rtichoke::plot_decision_curve(\n col_values = c(\"#F8766D\", \"#00BFC4\"),\n type = \"interventions avoided\",\n size = 350\n ) %>%\n plotly::layout(\n yaxis = list(range =\n c(-10, 100))\n )" + "text": "Interventions Avoided\n\ndcurvesrtichoke\n\n\n\n\n\ndca_prepost %>%\n net_intervention_avoided() %>% \n plot(type = 'net_intervention_avoided', \n smooth = FALSE) + \n theme_classic() +\n theme(legend.position = \"none\")\n\n\n\n\n\n\n\n\n\n\n\nperformance_data_dc %>%\n plot_decision_curve(\n col_values = c(\"#F8766D\", \"#00BFC4\"),\n type = \"interventions avoided\",\n size = 350\n ) %>%\n plotly::layout(\n yaxis = list(range =\n c(-10, 100))\n )" }, { - "objectID": "posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html#conventional-and-interventions-avoided-combined-rtichoke-code", - "href": "posts/2022-07-05-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html#conventional-and-interventions-avoided-combined-rtichoke-code", + "objectID": "posts/2022-10-02-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html#conventional-and-interventions-avoided-combined-rtichoke-code", + "href": "posts/2022-10-02-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.html#conventional-and-interventions-avoided-combined-rtichoke-code", "title": "DCA for Quantifying the Additional Benefit of a New Marker by Emily Vertosick and Andrew Vickers", "section": "Conventional and Interventions Avoided Combined (rtichoke code)", "text": "Conventional and Interventions Avoided Combined (rtichoke code)\n\n\n\nperformance_data_dc %>%\n plot_decision_curve(\n col_values = \n c(\"#00BFC4\", \"#C77CFF\"),\n type = \"combined\",\n size = 500\n )" }, { - "objectID": "posts/2022-04-05-From-Haifa-University-to-Clalit Innovation/index.html", - "href": "posts/2022-04-05-From-Haifa-University-to-Clalit Innovation/index.html", - "title": "From Haifa University to Clalit innovation", - "section": "", - "text": "https://haifa-to-clalit-innovation.netlify.app/journey_haifa_to_cri/journey_haifa_to_cri.html#1" - }, - { - "objectID": "posts/2022-08-21-cox-box-transformation/index.html", - "href": "posts/2022-08-21-cox-box-transformation/index.html", - "title": "Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson", + "objectID": "posts.html", + "href": "posts.html", + "title": "Posts", "section": "", - "text": "This blog will be dedicated to the {rtichoke} package, which means that it will contain posts that are related to performance metrics and the possible related usability of {rtichoke}." - }, - { - "objectID": "posts/2022-08-21-cox-box-transformation/index.html#replications", - "href": "posts/2022-08-21-cox-box-transformation/index.html#replications", - "title": "Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson", - "section": "Replications", - "text": "Replications\nTo make the package easier to use I plan to reproduce other people’s code with rtichoke, posts of this kind will be available under the category “replications”.\nMy first choice is to replicate the first example from the book “Feature Engineering and Selection: A Practical Approach for Predictive Models by Max Kuhn and Kjell Johnson”.\nIn this example you can see how Box-Cox transformation improves the discrimination capability of the logistic regression model without using any additional information." - }, - { - "objectID": "posts/2022-08-21-cox-box-transformation/index.html#original-code", - "href": "posts/2022-08-21-cox-box-transformation/index.html#original-code", - "title": "Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson", - "section": "Original Code", - "text": "Original Code\nThe code is almost identical to the original code that can be found on github.\n\nPreparing the Data\n\nlibrary(caret)\nlibrary(tidymodels)\nlibrary(ggplot2)\n\n\ndata(segmentationData)\n\nsegmentationData <- \n segmentationData %>% \n dplyr::select(EqSphereAreaCh1, PerimCh1, Class, Case) %>% \n setNames(c(\"PredictorA\", \"PredictorB\", \"Class\", \"Case\")) %>% \n mutate(Class = factor(ifelse(Class == \"PS\", \"One\", \"Two\")))\n\nexample_train <- \n segmentationData %>% \n dplyr::filter(Case == \"Train\") %>% \n dplyr::select(-Case)\n\nexample_test <- \n segmentationData %>% \n dplyr::filter(Case == \"Test\") %>% \n dplyr::select(-Case)\n\n\n\nTraining the Models\n\nexample_ctrl <- \n trainControl(method = \"none\",\n classProbs = TRUE,\n summaryFunction = twoClassSummary)\n\nnatural_terms <- train(Class ~ PredictorA + PredictorB,\n data = example_train,\n method = \"glm\",\n metric = \"ROC\",\n trControl = example_ctrl)\n\ntrans_terms <- train(Class ~ PredictorA + PredictorB,\n data = example_train,\n method = \"glm\",\n preProc = \"BoxCox\",\n metric = \"ROC\",\n trControl = example_ctrl)\n\n\n\nCreating Predictions\n\noriginal_probs <- predict(natural_terms, example_test, type = \"prob\")[,1]\n\ntransformed_probs <- predict(trans_terms, example_test, type = \"prob\")[,1]\n\noutcomes <- example_test$Class == \"One\"\n\n\n\nCreating ROC Curve with yardstick\n\nnatural_dat <-\n example_test %>% \n mutate(\n prob = original_probs) %>% \n roc_curve(Class, prob) %>% \n mutate(Format = \"Natural Units\")\n\ntrans_dat <-\n example_test %>% \n mutate(\n prob = transformed_probs) %>% \n roc_curve(Class, prob) %>% \n mutate(Format = \"Inverse Units\") \n\nboth_dat <- \n bind_rows(natural_dat, trans_dat) %>%\n mutate(\n Format = factor(Format, levels = c(\"Natural Units\", \"Inverse Units\")))\n\ntrans_roc_plot <- \n ggplot(both_dat) +\n geom_step(aes(x = 1 - specificity, y = sensitivity, color = Format)) + \n coord_equal() + \n xlab(\"False Positive Rate\") + \n ylab(\"True Positive Rate\") + \n theme(legend.position = c(.8, .2)) + \n scale_colour_manual(\n values = c(\"Natural Units\" = \"grey\", \n \"Inverse Units\" = \"black\")) + \n geom_abline(intercept = 0, slope = 1, col = \"grey\", lty = 2) +\n theme_classic()\n\ntrans_roc_plot" - }, - { - "objectID": "posts/2022-08-21-cox-box-transformation/index.html#rtichoke-code", - "href": "posts/2022-08-21-cox-box-transformation/index.html#rtichoke-code", - "title": "Box-Cox transformation from Feature Engineering by Max Kuhn and Kjell Johnson", - "section": "rtichoke code", - "text": "rtichoke code\n\nCreating ROC Curve with rtichoke\n\nBy Probability ThresholdBy Percent Positives Conditional Rate\n\n\n\n\n\nlibrary(rtichoke)\n\ncreate_roc_curve(\n probs = list(\n \"Natural Units\" = original_probs,\n \"Inverse Units\" = transformed_probs\n ),\n reals = list(\n outcomes\n ),\n size = 350, \n col_values = c(\"grey\", \"black\")\n) \n\n\n\n\n\n\n\n\n\n\n\n\n\n\nlibrary(rtichoke)\n\ncreate_roc_curve(\n probs = list(\n \"Natural Units\" = original_probs,\n \"Inverse Units\" = transformed_probs\n ),\n reals = list(\n outcomes\n ),\n stratified_by = \"ppcr\",\n size = 350, \n col_values = c(\"grey\", \"black\")\n)" + "text": "Replications\n\n\nDecision\n\n\nEmily Vertosick\n\n\nAndrew Vickers\n\n\ngt\n\n\ngtsummary\n\n\ndcurves\n\n\nrms\n\n\nHmisc\n\n\n\n\n\n\n\n\n\n\n\nOct 2, 2022\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n\n\nReplications\n\n\nROC\n\n\nMax Kuhn\n\n\nKjell Johnson\n\n\nFeature Engineering by Max Kuhn and Kjell Johnson\n\n\ncaret\n\n\n\n\n\n\n\n\n\n\n\nAug 21, 2022\n\n\n\n\n\n\nNo matching items" } ] \ No newline at end of file diff --git a/_site/site_libs/bootstrap/static/fonts/Fraunces9pt-Black.woff2 b/_site/site_libs/bootstrap/static/fonts/Fraunces9pt-Black.woff2 deleted file mode 100644 index 9354718..0000000 Binary files a/_site/site_libs/bootstrap/static/fonts/Fraunces9pt-Black.woff2 and /dev/null differ diff --git a/_site/site_libs/bootstrap/static/fonts/Fraunces9pt-Regular.woff2 b/_site/site_libs/bootstrap/static/fonts/Fraunces9pt-Regular.woff2 deleted file mode 100644 index ece824c..0000000 Binary files a/_site/site_libs/bootstrap/static/fonts/Fraunces9pt-Regular.woff2 and /dev/null differ diff --git a/_site/site_libs/bootstrap/static/fonts/fraunces-v23-latin-regular.woff b/_site/site_libs/bootstrap/static/fonts/fraunces-v23-latin-regular.woff deleted file mode 100644 index 08577ef..0000000 Binary files a/_site/site_libs/bootstrap/static/fonts/fraunces-v23-latin-regular.woff and /dev/null differ diff --git a/_site/site_libs/bootstrap/static/fonts/fraunces-v23-latin-regular.woff2 b/_site/site_libs/bootstrap/static/fonts/fraunces-v23-latin-regular.woff2 deleted file mode 100644 index ff08287..0000000 Binary files a/_site/site_libs/bootstrap/static/fonts/fraunces-v23-latin-regular.woff2 and /dev/null differ diff --git a/_site/styles.css b/_site/styles.css deleted file mode 100644 index 2ddf50c..0000000 --- a/_site/styles.css +++ /dev/null @@ -1 +0,0 @@ -/* css styles */ diff --git a/_site/theme.scss b/_site/theme.scss deleted file mode 100644 index 631658d..0000000 --- a/_site/theme.scss +++ /dev/null @@ -1,5 +0,0 @@ -/*-- scss:defaults --*/ -$primary: #fff7f5 !default; -$font-family-base: 'Roboto Regular', 'Source Sans Pro', 'Lato', 'Merriweather', 'Cabin Regular' !default; -$body-color: $gray-700 !default; -$headings-font-family: 'Prata', 'Roboto', 'Playfair Display', 'Montserrat' !default; \ No newline at end of file diff --git a/about.qmd b/about.qmd deleted file mode 100644 index a563014..0000000 --- a/about.qmd +++ /dev/null @@ -1,18 +0,0 @@ ---- -title: "About" -image: profile.jpg -about: - template: jolla - links: - - icon: twitter - text: Twitter - href: https://twitter.com - - icon: linkedin - text: LinkedIn - href: https://linkedin.com - - icon: github - text: Github - href: https://github.com ---- - -About this blog diff --git a/about_me.qmd b/about_me.qmd index 7d366ad..46b61f8 100644 --- a/about_me.qmd +++ b/about_me.qmd @@ -16,24 +16,8 @@ about: - icon: linkedin href: https://www.linkedin.com/in/uriah-finkel text: Linkedin -format: - html: - resources: - - audio.mp4 --- -
- -
How to Say My Name
- - - -
- Author of [rtichoke](https://uriahf.github.io/rtichoke/) Data Scientist in [Clalit Innovation](https://www.clalit-innovation.org/) diff --git a/audio.mp4 b/audio.mp4 deleted file mode 100644 index c5295c4..0000000 Binary files a/audio.mp4 and /dev/null differ diff --git a/posts/2022-10-02-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.qmd b/posts/2022-10-02-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.qmd index 3ac850e..29bdc13 100644 --- a/posts/2022-10-02-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.qmd +++ b/posts/2022-10-02-dca-for-quantifying-the-additional-benefit-of-a-new-marker-by-emily-vertosick-and-andrew-vickers/index.qmd @@ -12,7 +12,7 @@ categories: - rms - Hmisc image: "image.jpg" -draft: true +draft: false --- ```{r} @@ -53,6 +53,8 @@ acath_pred <- post_pred %>% enframe(name = NULL, value = "post") ) +library(dcurves) + # decision curve dca_prepost <- dca( @@ -67,7 +69,7 @@ dca_prepost_smooth <- plot(smooth = TRUE) dca_prepost_15_35 <- - dcurves::dca( + dca( sigdz ~ pre + post, data = acath_pred, thresholds = seq(0.15, 0.35, by = 0.05), @@ -79,14 +81,10 @@ dca_prepost_15_35 <- show_ggplot_code = FALSE) # net interventions avoided -dca_prepost_netint_ob <- dca_prepost %>% - dcurves::net_intervention_avoided() -dca_prepost_netint <- dca_prepost_netint_ob %>% - plot(x, - type = 'net_intervention_avoided', - smooth = FALSE, - show_ggplot_code = FALSE) +dca_prepost_netint <- dca_prepost %>% + net_intervention_avoided() %>% + plot(type = 'net_intervention_avoided') # estimates table df_dca_tbl <- @@ -302,7 +300,7 @@ performance_data_dc %>% c("#00BFC4", "#C77CFF"), size = 350 ) %>% - layout( + plotly::layout( yaxis = list( range = c(-0.07, 0.7) @@ -330,7 +328,7 @@ conventional_dc <- performance_data_dc %>% col_values = c("#00BFC4", "#C77CFF"), size = 350 ) %>% - layout( + plotly::layout( yaxis = list(range = c(-0.07, 0.7)) ) @@ -409,7 +407,7 @@ dca_prepost_15_35 + #| warning: false performance_data_dc %>% - rtichoke::plot_decision_curve( + plot_decision_curve( col_values = c("#00BFC4", "#C77CFF"), min_p_threshold = 0.15, max_p_threshold = 0.35, @@ -454,7 +452,13 @@ performance_data_dc %>% #| eval: false #| warning: false -# code +dca_prepost %>% + net_intervention_avoided() %>% + plot(type = 'net_intervention_avoided', + smooth = FALSE) + + theme_classic() + + theme(legend.position = "none") + ``` ![](./interventions_avoided.svg) @@ -469,7 +473,7 @@ performance_data_dc %>% #| warning: false performance_data_dc %>% - rtichoke::plot_decision_curve( + plot_decision_curve( col_values = c("#F8766D", "#00BFC4"), type = "interventions avoided", size = 350