diff --git a/NAMESPACE b/NAMESPACE
index f46e478..0845658 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -4,10 +4,14 @@ export(estimateMetaI)
 export(fitConf)
 export(fitConfModels)
 export(fitMetaDprime)
+export(plotConfModelFit)
 export(simConf)
+import(ggplot2)
 import(parallel)
+importFrom(Rmisc,summarySEwithin)
 importFrom(plyr,ddply)
 importFrom(plyr,mdply)
+importFrom(plyr,summarise)
 importFrom(stats,dnorm)
 importFrom(stats,integrate)
 importFrom(stats,optim)
diff --git a/R/plotConfModelFit.R b/R/plotConfModelFit.R
index 4e46969..fb0f95c 100644
--- a/R/plotConfModelFit.R
+++ b/R/plotConfModelFit.R
@@ -32,7 +32,7 @@
 #' one column for each estimated model parameter (parameters
 #' not present in a specific model are filled with NAs)
 #'
-#' #' @examples
+#' @examples
 #' # 1. Select two subjects from the masked orientation discrimination experiment
 #' data <- subset(MaskOri, participant %in% c(1:2))
 #' head(data)
@@ -58,7 +58,7 @@
 #'   myPlottedFit
 #' }
 #' @import ggplot2
-#' @importFrom plyr ddply transform summarise
+#' @importFrom plyr ddply summarise
 #' @importFrom Rmisc summarySEwithin
 #'
 #' @export
@@ -90,17 +90,29 @@ plotConfModelFit <- function(data, fitted_pars, model = NULL){
   }
   if(!all(data$correct %in% c(0,1))) stop("correct should be 1 or 0")
 
-  myColor <- switch(model, 'GN' = 1, 'IG' = 2, 'ITGc'  = 3, 'ITGcm' = 4, 'logN' = 5,
-                    'logWEV' = 6,'PDA' = 7,  'WEV' = 8, 'SDT' = 9) # models are color coded
+  PlotName <-
+    switch(model,
+           'GN' = "Gaussian noise model",
+           'IG' = "Independent Gaussian model",
+           'ITGc'  = "Independent truncated Gaussian model: HMetad-Version",
+           'ITGcm' = "Independent truncated Gaussian model: Meta-d'-Version",
+           'logN' = "Logistic noise model",
+           'logWEV' = "Logistic weighted evidence and visibility model",
+           'PDA' = "Post-decisional accumulation model",
+           'WEV' = "Weighted evidence and visibility model",
+           'SDT' = "Signal detection rating model") # models are color coded
 
   # 1. First aggregate on the level of subjects
 
   AggDist <-
     plyr::ddply(data,
-                ~  diffCond * rating * stimulus * correct * participant, #,
-                plyr::summarise, p = length(rating),  .drop=FALSE)
+                ~  diffCond * rating *
+                  stimulus * correct * participant, #,
+                plyr::summarise,
+                p = length(rating),  .drop=FALSE)
 
-  AggDist <- plyr::ddply(AggDist, ~ diffCond * stimulus,
+  AggDist <- plyr::ddply(AggDist, ~
+                           diffCond * stimulus * participant,
                          transform, N = sum(p))
   AggDist$p <- AggDist$p / AggDist$N
 
@@ -119,8 +131,10 @@ plotConfModelFit <- function(data, fitted_pars, model = NULL){
                            na.rm = TRUE, .drop = TRUE)
   AggDist$rating <- as.numeric(AggDist$rating)
   levels(AggDist$stimulus) <- c("S = -1", "S = 1")
-  AggDist$diffCond <- factor(as.numeric(AggDist$diffCond)) # diffCond should code the order of difficulty levels
-  levels(AggDist$diffCond) <- paste("K =", as.numeric(levels(AggDist$diffCond)))
+  AggDist$diffCond <-
+    factor(as.numeric(AggDist$diffCond)) # diffCond should code the order of difficulty levels
+  levels(AggDist$diffCond) <-
+    paste("K =", as.numeric(levels(AggDist$diffCond)))
 
   # 4) create the prediction from model fit
 
@@ -160,6 +174,7 @@ plotConfModelFit <- function(data, fitted_pars, model = NULL){
     xlab("Confidence rating") +
     ylab("probability") +
     ylim(c(0,1))+
+    ggtitle(PlotName) +
     theme(strip.text.y = element_text(angle=0)) +
     theme_minimal()
 
diff --git a/README.rmd b/README.rmd
index 19ce851..b2f156a 100644
--- a/README.rmd
+++ b/README.rmd
@@ -157,9 +157,7 @@ features in the confidence judgment. The parameters $w$ and $\sigma$ are free pa
 The conceptual idea of meta-d′ is to quantify metacognition in terms of sensitivity
 in a hypothetical signal detection rating model describing the primary task, 
 under the assumption that participants had perfect access to the sensory evidence
-and were perfectly consistent in placing their confidence criteria (Maniscalco & Lau, 2012, 2014). Using a signal detection model describing the primary task to quantify metacognition
-allows a direct comparison between metacognitive accuracy and discrimination performance
-because both are measured on the same scale. Meta-d′ can be compared against the estimate of the distance between the two stimulus distributions estimated from discrimination responses, which is referred to as d′: If meta-d′ equals d′, it means that metacognitive accuracy is exactly as good as expected from discrimination performance. If meta-d′ is lower than d′, it means that metacognitive accuracy is not optimal. It can be shown that the implicit model of confidence underlying the meta-d'/d' method is identical to different versions of the independent truncated Gaussian model (Rausch et al., 2023), depending on whether the original model specification by Maniscalco and Lau (2012) or alternatively the specification by Fleming (2017) is used. We strongly recommend to test whether the independent truncated Gaussian models are adequate descriptions of the data before quantifying metacognitive efficiency  with meta-d′/d′.
+and were perfectly consistent in placing their confidence criteria (Maniscalco & Lau, 2012, 2014). Using a signal detection model describing the primary task to quantify metacognition, it allows a direct comparison between metacognitive accuracy and discrimination performancembecause both are measured on the same scale. Meta-d′ can be compared against the estimate of the distance between the two stimulus distributions estimated from discrimination responses, which is referred to as d′: If meta-d′ equals d′, it means that metacognitive accuracy is exactly as good as expected from discrimination performance. If meta-d′ is lower than d′, it means that metacognitive accuracy is not optimal. It can be shown that the implicit model of confidence underlying the meta-d'/d' method is identical to different versions of the independent truncated Gaussian model (Rausch et al., 2023), depending on whether the original model specification by Maniscalco and Lau (2012) or alternatively the specification by Fleming (2017) is used. We strongly recommend to test whether the independent truncated Gaussian models are adequate descriptions of the data before quantifying metacognitive efficiency  with meta-d′/d′.
 
 ### Information-theoretic measures of metacognition
 
@@ -182,9 +180,7 @@ $$meta-I_{2}^{r} = meta-I / H(Y = \hat{Y})$$
 Notably, Dayan (2023) pointed out that a liberal or conservative use of the confidence levels will affected the mutual information and thus all information-theoretic measures of metacognition.
 
 In addition to Dayan's measures, Meyen et al. (submitted) suggested an
-additional measure that normalizes the Meta-I by the range of possible values
-it can take. This required deriving lower and upper bounds of the transmitted
-information given a participant's accuracy.
+additional measure that normalizes the Meta-I by the range of possible values it can take. This required deriving lower and upper bounds of the transmitted information given a participant's accuracy.
 
 $$RMI = \frac{meta-I}{\max_{\text{accuracy}}\{meta-I\}}$$
 
@@ -192,8 +188,7 @@ As these measures are prone to estimation bias, the package offers a simple
 bias reduction mechanism in which the observed frequencies of
 stimulus-response combinations are taken as the underlying probability
 distribution. From this, Monte-Carlo simulations are conducted to estimate
-and subtract the bias from these measures. Note that there is probably no way
-to remove this bias completely.
+and subtract the bias from these measures. Note that there is probably no way to remove this bias completely.
 
 ## Installation
 
@@ -262,60 +257,23 @@ It can be seen that the independent truncated Gaussian model is consistently out
 
 ### Visualization 
 
-After obtaining model fits, it is strongly recommended to visualize the prediction implied by the best fitting sets of parameters and to compare the prediction with the actual data (Palminteri et al., 2017). The best way to visualize the data is highly specific to the data set and research question, which is why `statConfR` does not come with its own visualization tools. This being said, here is an example for how a visualization of model fit could look like: 
+After obtaining the model fit, it is strongly recommended to visualise the predictions implied by the best-fitting set of parameters and to compare the predictions with the actual data (Palminteri et al., 2017). The `statConfR' package provides the function `plotConfModelFit', which plots the empirically observed distribution of responses and confidence ratings (on the x-axis) as a function of discriminability (in the rows) and stimulus (in the columns) as bars. Superimposed on the empirical data, the plot also shows the prediction of a selected model as dots. The parameters of the model are passed to `plotConfModelFit' by the argument `fitted_pars'
 
 <!-- Stuff where only the code should be shown and executed, but do not show R yapping  -->
 
 ```{r, echo=TRUE, results="hide", message=FALSE, warning=FALSE}
-library(tidyverse)
-AggregatedData <- MaskOri %>%
-  mutate(ratings = as.numeric(rating), diffCond = as.numeric(diffCond)) %>%
-  group_by(participant, diffCond, correct ) %>% 
-  dplyr::summarise(ratings=mean(ratings,na.rm=T)) %>%
-  Rmisc::summarySEwithin(measurevar = "ratings",
-                         withinvars = c("diffCond", "correct"), 
-                         idvar = "participant",
-                         na.rm = TRUE, .drop = TRUE) %>% 
-  mutate(diffCond = as.numeric(diffCond))
-AggregatedPrediction <- 
-  rbind(fitted_pars %>%
-          filter(model=="ITGcm") %>%
-          group_by(participant) %>%
-          simConf(model="ITGcm") %>% 
-          mutate(model="ITGcm"), 
-        fitted_pars %>%
-          filter(model=="WEV") %>%
-          group_by(participant) %>%
-          simConf(model="WEV") %>% 
-          mutate(model="WEV")) %>%
-  mutate(ratings = as.numeric(rating) ) %>%
-  group_by(participant, diffCond, correct, model ) %>% 
-  dplyr::summarise(ratings=mean(ratings,na.rm=T)) %>%
-  Rmisc::summarySEwithin(measurevar = "ratings",
-                  withinvars = c("diffCond", "correct", "model"), 
-                  idvar = "participant",
-                  na.rm = TRUE, .drop = TRUE) %>% 
-  mutate(diffCond = as.numeric(diffCond))
-PlotMeans <- 
-  ggplot(AggregatedPrediction, 
-         aes(x = diffCond, y = ratings, color = correct)) + facet_grid(~ model) +
-   ylim(c(1,5)) + 
-   geom_line() +  ylab("confidence rating") + xlab("difficulty condition") +
-   scale_color_manual(values = c("darkorange", "navy"),
-                     labels = c("Error", "Correct response"), name = "model prediction") + 
-  geom_errorbar(data = AggregatedData, 
-                aes(ymin = ratings-se, ymax = ratings+se), color="black") + 
-  geom_point(data = AggregatedData, aes(shape=correct), color="black") + 
-  scale_shape_manual(values = c(15, 16),
-                     labels = c("Error", "Correct response"), name = "observed data") + 
-  theme_bw()
+PlotFitWEV <- plotConfModelFit(MaskOri, fitted_pars, model="WEV")
+PlotFitITGcm <- plotConfModelFit(MaskOri, fitted_pars, model="ITGcm")
 ```
 <!-- Show both the code and the output Figure!  -->
 
-```{r, echo=TRUE, fig.cap = "Predicted vs. observed confidence as a function of discriminability and correctness"}
-PlotMeans
+```{r, echo=TRUE, fig.cap = "Observed distribution of accuracy and responses as a function of discriminability and stimulus vs. prediction by the weighted evidence and visibility model"}
+PlotFitWEV
 ```
 
+```{r, echo=TRUE, fig.cap = "Observed distribution of accuracy and responses as a function of discriminability and stimulus vs. prediction by the Independent truncated Gaussian model: HMetad-Version (ITGc)"}
+PlotFitITGcm
+```
 
 ### Measuring metacognition
 
diff --git a/TestResults.RData b/TestResults.RData
index 4ded2f5..68fbdc1 100644
Binary files a/TestResults.RData and b/TestResults.RData differ
diff --git a/TestScript.R b/TestScript.R
index f6b8486..2fa2858 100644
--- a/TestScript.R
+++ b/TestScript.R
@@ -371,7 +371,8 @@ merge(MetaDs %>% select(participant, Ratio),
 
 # 5) Plotting fits
 
-PlotFitSDT <- plotConfModelFit(MaskOri, fitted_pars, model="SDT")
+PlotFitSDT <- plotConfModelFit(data=MaskOri, fitted_pars=fitted_pars, model="SDT")
+
 PlotFitGN <- plotConfModelFit(MaskOri, fitted_pars, model="GN")
 PlotFitLogN <- plotConfModelFit(MaskOri, fitted_pars, model="logN")
 PlotFitWEV <- plotConfModelFit(MaskOri, fitted_pars, model="WEV")
@@ -381,6 +382,8 @@ PlotFitITGc <- plotConfModelFit(MaskOri, fitted_pars, model="ITGc")
 PlotFitIG <- plotConfModelFit(MaskOri, fitted_pars, model="IG")
 PlotFitPDA <- plotConfModelFit(MaskOri, fitted_pars, model="PDA")
 
+test <- group_BMS_fits(fitted_pars)
+group_BMS(fitted_pars)
 
 save(fitted_pars, PlotFitsBICWeights,
      recov_pars_SDT, Plot_recov_SDT,
diff --git a/man/fitConfModels.Rd b/man/fitConfModels.Rd
index aebc65f..6d38185 100644
--- a/man/fitConfModels.Rd
+++ b/man/fitConfModels.Rd
@@ -44,11 +44,10 @@ Defaults to 4.}
 number of cores -1 will be used.}
 }
 \value{
-Gives data frame with one row for each combination of model and
-participant. Columns include a model and participant column,
-one column for each estimated parameter for the different models (parameters
-that are not present in a specific model (row) but in other models are
-filled with NAs.
+Gives \code{data.frame} with one row for each combination of model and
+participant. There are different columns for the model, the participant ID, and one
+one column for each estimated model parameter (parameters
+not present in a specific model are filled with NAs).
 Additional information  about the fit is provided in additional columns:
 \itemize{
 \item \code{negLogLik} (negative log-likelihood of the best-fitting set of parameters),
diff --git a/man/plotConfModelFit.Rd b/man/plotConfModelFit.Rd
new file mode 100644
index 0000000..ce66d6d
--- /dev/null
+++ b/man/plotConfModelFit.Rd
@@ -0,0 +1,69 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plotConfModelFit.R
+\name{plotConfModelFit}
+\alias{plotConfModelFit}
+\title{Plot the prediction of fitted parameters of one model of confidence over the corresponding real data}
+\usage{
+plotConfModelFit(data, fitted_pars, model = NULL)
+}
+\arguments{
+\item{data}{a \code{data.frame} where each row is one trial, containing following
+variables:
+\itemize{
+\item \code{diffCond} (optional; different levels of discriminability,
+should be a factor with levels ordered from hardest to easiest),
+\item \code{rating} (discrete confidence judgments, should be a factor with levels
+ordered from lowest confidence to highest confidence;
+otherwise will be transformed to factor with a warning),
+\item \code{stimulus} (stimulus category in a binary choice task,
+should be a factor with two levels, otherwise it will be transformed to
+a factor with a warning),
+\item \code{correct} (encoding whether the response was correct; should  be 0 for
+incorrect responses and 1 for correct responses)
+\item \code{participant} (some group ID, most often a participant identifier;
+the models given in the second argument are fitted to each subset of \code{data}
+determined by the different values of this column)
+}}
+
+\item{fitted_pars}{a \code{data.frame} with one row for each participant and model parameters in different columns.
+fitted_pars also may contain a column called  \code{model}specifying the model to be visualized.
+If there is no model column in data or if there are multiple models in fitted_pars,
+it is necessary to specify the model argument.}
+
+\item{model}{\code{character}. See \code{\link{fitConf}} for all available models}
+}
+\value{
+a \code{data.frame} with one row for each combination of model and
+participant. There are different columns for the model, the participant ID, and one
+one column for each estimated model parameter (parameters
+not present in a specific model are filled with NAs)
+
+#' @examples1. Select two subjects from the masked orientation discrimination experiment
+
+data <- subset(MaskOri, participant \%in\% c(1:2))
+head(data)2. Fit some models to each subject of the masked orientation discrimination experiment
+
+\donttest{
+  # Fitting several models to several subjects takes quite some time
+  # (about 10 minutes per model fit per participant on a 2.8GHz processor
+  # with the default values of nInits and nRestart).
+  # If you want to fit more than just two subjects,
+  # we strongly recommend setting .parallel=TRUE
+  Fits <- fitConfModels(data, models = "ITGc", .parallel = FALSE)
+}3. Plot the predicted probabilies based on model and fitted parameter over the observed relative frequencies.
+
+\donttest{
+  # Fitting several models to several subjects takes quite some time
+  # (about 10 minutes per model fit per participant on a 2.8GHz processor
+  # with the default values of nInits and nRestart).
+  # If you want to fit more than just two subjects,
+  # we strongly recommend setting .parallel=TRUE
+  myPlottedFit <- plotConfModelFit(data, Fits)
+  myPlottedFit
+}
+}
+\description{
+The \code{plotConfModelFit} function plots the predicted distribution of discrimination responses
+and confidence ratings created from a \code{data.frame} of parameters obtaind from \code{\link{fitConfModels}}
+and overlays the predicted distribution over the data to which the model parameters were fitted.
+}