diff --git a/notebooks/demo_haploid.ipynb b/notebooks/demo_haploid.ipynb index aacc13c..90ff1cd 100644 --- a/notebooks/demo_haploid.ipynb +++ b/notebooks/demo_haploid.ipynb @@ -24,7 +24,7 @@ "metadata": {}, "outputs": [], "source": [ - "# First, let's simulate some haplotypes to use as a reference panel.\n", + "# Simulate some haplotypes to use as a reference panel.\n", "seed = 1208\n", "ts = msprime.sim_mutations(\n", " msprime.sim_ancestry(\n", @@ -52,10 +52,7 @@ "\n", "# A query must be of size (1, number of sites).\n", "# It can contain MISSING values, but not NONCOPY values.\n", - "query = ref_panel[:, 0].reshape(1, ts.num_sites)\n", - "\n", - "# The number of distinct alleles per site is needed to get per-site mutation rates.\n", - "num_alleles = lshmm.core.get_num_alleles(ref_panel, query)\n" + "query = ref_panel[:, 0].reshape(1, ts.num_sites)\n" ] }, { @@ -64,10 +61,10 @@ "metadata": {}, "outputs": [], "source": [ - "# Per-site recombination and mutation probabilities are needed\n", - "# to parametrise the LS HMM.\n", + "# Set per-site recombination and mutation probabilities.\n", "rho = np.zeros(ts.num_sites, dtype=np.float64) + 1e-4\n", "rho[0] = 0\n", + "\n", "mu = np.zeros(ts.num_sites, dtype=np.float64) + 1e-4\n" ] }, @@ -84,11 +81,11 @@ "metadata": {}, "outputs": [], "source": [ - "# Let's determine the best copying path and its log-likelihood using the Viterbi algorithm.\n", + "# Infer the best copying path using the Viterbi algorithm.\n", "path, log_lik = lshmm.api.viterbi(\n", " ref_panel,\n", " query,\n", - " num_alleles,\n", + " ploidy=1,\n", " prob_recombination=rho,\n", " prob_mutation=mu,\n", " scale_mutation_rate=True,\n", @@ -103,14 +100,13 @@ "metadata": {}, "outputs": [], "source": [ - "# There is a function to evaluate the log-likelihood of a copying path.\n", - "# Let's check that we get the same log-likelihood of the same copying path \n", - "# under the same parameters using the function.\n", + "# Evaluate the log-likelihood of the best copying path.\n", + "# We should get the same log-likelihood.\n", "log_lik_2 = lshmm.api.path_loglik(\n", " ref_panel,\n", " query,\n", - " num_alleles,\n", - " path,\n", + " ploidy=1,\n", + " path=path,\n", " prob_recombination=rho,\n", " prob_mutation=mu,\n", " scale_mutation_rate=True,\n", @@ -124,13 +120,13 @@ "metadata": {}, "outputs": [], "source": [ - "# We should get a different log-likelihood value if the parameters are not the same.\n", - "# Let's check the log-likelihood of the same copying path under different parameters.\n", + "# We should get a different log-likelihood value if the parameters are different.\n", + "# Check the log-likelihood of the same copying path under different parameters.\n", "log_lik_3 = lshmm.api.path_loglik(\n", " ref_panel,\n", " query,\n", - " num_alleles,\n", - " path,\n", + " ploidy=1,\n", + " path=path,\n", " prob_recombination=rho,\n", " prob_mutation=mu + 1e-5, # Increase\n", " scale_mutation_rate=True,\n", @@ -151,13 +147,12 @@ "metadata": {}, "outputs": [], "source": [ - "# To calculate the forward probability matrix of a query,\n", - "# let's use the forward algorithm.\n", + "# Calculate the forward probability matrix of a query,\n", "np.set_printoptions(linewidth=100, precision=4)\n", "fwd_mat, norm_factor, log_lik = lshmm.api.forwards(\n", " ref_panel,\n", " query,\n", - " num_alleles=num_alleles,\n", + " ploidy=1,\n", " prob_recombination=rho,\n", " prob_mutation=mu,\n", " scale_mutation_rate=True,\n", @@ -172,13 +167,12 @@ "metadata": {}, "outputs": [], "source": [ - "# Now, to calculate the backward probability matrix of the same query,\n", - "# let's use the backward algorithm.\n", + "# Calculate the backward probability matrix of the same query.\n", "np.set_printoptions(linewidth=100, precision=4)\n", "bwd_mat = lshmm.api.backwards(\n", " ref_panel,\n", " query,\n", - " num_alleles,\n", + " ploidy=1,\n", " normalisation_factor_from_forward=norm_factor,\n", " prob_recombination=rho,\n", " prob_mutation=mu,\n",