diff --git a/Additional_src/Modified_hifiasm/.github/workflows/ci.yaml b/Additional_src/Modified_hifiasm/.github/workflows/ci.yaml
deleted file mode 100644
index 3bbd88d..0000000
--- a/Additional_src/Modified_hifiasm/.github/workflows/ci.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-name: CI
-
-on:
-  push:
-    branches:
-    - master
-  pull_request:
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        compiler: [gcc]
-
-    steps:
-    - name: Checkout minimap2
-      uses: actions/checkout@v2
-
-    - name: Compile with ${{ matrix.compiler }}
-      run:  make CC=${{ matrix.compiler }}
diff --git a/Additional_src/Modified_hifiasm/CommandLines.h b/Additional_src/Modified_hifiasm/CommandLines.h
index f8c91cc..2c56e16 100644
--- a/Additional_src/Modified_hifiasm/CommandLines.h
+++ b/Additional_src/Modified_hifiasm/CommandLines.h
@@ -5,7 +5,7 @@
 #include <pthread.h>
 #include <stdint.h>
 
-#define HA_VERSION "0.19.3-r572"
+#define HA_VERSION "0.19.5-r587"
 
 #define VERBOSE 0
 
diff --git a/Additional_src/Modified_hifiasm/Overlaps.cpp b/Additional_src/Modified_hifiasm/Overlaps.cpp
index 038b495..00ff448 100644
--- a/Additional_src/Modified_hifiasm/Overlaps.cpp
+++ b/Additional_src/Modified_hifiasm/Overlaps.cpp
@@ -59,12 +59,21 @@ KRADIX_SORT_INIT(u_trans_ts, u_trans_t, u_trans_ts_key, member_size(u_trans_t, t
 KRADIX_SORT_INIT(ha_mzl_t_srt1, ha_mzl_t, ha_mzl_t_key, member_size(ha_mzl_t, x))
 
 #define UL_COV_THRES 2
+#define PHASE_SEP 64
+#define PHASE_SEF 2
+#define PHASE_SEP_RATE 0.04
+#define PHASE_MISS_LEN 1000000
+#define PHASE_MISS_N 8
 
 KSORT_INIT_GENERIC(uint32_t)
 
 void reduce_hamming_error_adv(ma_ug_t *iug, asg_t *sg, ma_hit_t_alloc* sources, ma_sub_t *coverage_cut, 
 int max_hang, int min_ovlp, long long gap_fuzz, R_to_U *ru, bubble_type* bub);
 void print_vw_edge(asg_t *sg, uint32_t vid, uint32_t wid, const char *cmd);
+void output_trio_graph_joint(asg_t *sg, ma_sub_t* coverage_cut, char* output_file_name, 
+ma_hit_t_alloc* sources, ma_hit_t_alloc* reverse_sources, long long tipsLen, float tip_drop_ratio, 
+long long stops_threshold, R_to_U* ruIndex, float chimeric_rate, float drop_ratio, int max_hang, 
+int min_ovlp, long long gap_fuzz, bub_label_t* b_mask_t, ma_ug_t **rhu0, ma_ug_t **rhu1);
 
 typedef struct {
     uint32_t d, tot, ma, p;
@@ -99,12 +108,45 @@ typedef struct {
     asg64_v *rr;
 } rd_hamming_t;
 
+typedef struct {
+    asg_t *ref;
+    asg_t *nsg;
+    ma_ug_t *nug;
+    uint32_t *o2n;
+    uint64_t *ugh;
+    asg64_v *srt;
+} rd_hamming_fly_t;
+
+typedef struct {
+    // asg_t *ref;
+    // asg_t *ng;
+    ma_hit_t_alloc* src; 
+    ma_sub_t *cov; 
+    int32_t max_hang; 
+    int32_t min_ovlp;
+    int32_t gap_fuzz;
+    asg32_v *srt;
+    uint8_t *vs;
+    // uint32_t *rs;
+    ma_ug_t *fg;
+    kvec_asg_arc_t_warp *ae;
+    uint32_t n_insert;
+} rd_hamming_fly_simp_t;
+
+typedef struct {
+    ma_ug_t *ug;
+    asg_t *rg;
+    uint32_t *ridx;
+    uint64_t *ra;
+    uint64_t ridx_n, ra_n;
+} dedup_idx_t;
+
 ///this value has been updated at the first line of build_string_graph_without_clean
 long long min_thres;
 
 uint32_t print_untig_by_read(ma_ug_t *g, const char* name, uint32_t in, ma_hit_t_alloc* sources, 
 ma_hit_t_alloc* reverse_sources, const char* info);
-int asg_pop_bubble_primary_trio(ma_ug_t *ug, uint64_t* i_max_dist, uint32_t positive_flag, uint32_t negative_flag, hap_cov_t *cov, utg_trans_t *o, uint32_t is_update_chain);
+int asg_pop_bubble_primary_trio(ma_ug_t *ug, uint64_t* i_max_dist, uint32_t positive_flag, uint32_t negative_flag, hap_cov_t *cov, utg_trans_t *o, uint32_t is_update_chain, rd_hamming_fly_simp_t *p);
 kv_u_trans_t *get_utg_ovlp(ma_ug_t **ug, asg_t* read_g, ma_hit_t_alloc* sources, ma_hit_t_alloc* reverse_sources, ma_sub_t* coverage_cut, 
 R_to_U* ruIndex, int max_hang, int min_ovlp, kvec_asg_arc_t_warp* new_rtg_edges, bub_label_t* b_mask_t, uint8_t* r_het);
 void delete_useless_nodes(ma_ug_t **ug);
@@ -8615,6 +8657,236 @@ ma_ug_t *ma_ug_gen(asg_t *g)
 	return ug;
 }
 
+ma_ug_t *ma_ug_gen_phase(asg_t *g, uint32_t min_occ, double cutoff)
+{
+    // fprintf(stderr, "\n-0-[M::%s] min_occ::%u, cutoff::%f\n", __func__, min_occ, cutoff);
+    asg_cleanup(g);
+	int32_t *mark; 
+	uint32_t i, v, n_vtx = g->n_seq * 2, fn, mn, fn0, mn0, fn1, mn1, n1, cn, k, st, pt, ct, sz, ez;
+    uint64_t z;
+	///is a queue
+	kdq_t(uint64_t) *q;
+    asg64_v uidx; kv_init(uidx); 
+    
+	ma_ug_t *ug;
+
+	ug = (ma_ug_t*)calloc(1, sizeof(ma_ug_t));
+	ug->g = asg_init();
+    ///each node has two directions
+	mark = (int32_t*)calloc(n_vtx, 4);
+
+	q = kdq_init(uint64_t);
+	for (v = 0; v < n_vtx; ++v) {
+		uint32_t w, x, l, start, end, len;
+		ma_utg_t *p;
+        if (g->seq[v>>1].del || mark[v]) continue;
+        if (arc_cnt(g, v) == 0 && arc_cnt(g, (v^1)) != 0) continue;
+		mark[v] = 1;
+		q->count = 0, start = v, end = v^1, len = 0; fn = mn = 0;
+		// forward
+		w = v; 
+		while (1) {
+			/**
+			 * w----->x
+			 * w<-----x
+			 * that means the only suffix of w is x, and the only prefix of x is w
+			 **/
+			if (arc_cnt(g, w) != 1) break;
+			x = arc_first(g, w).v; // w->x
+			if (arc_cnt(g, x^1) != 1) break;
+			/**
+			 * another direction of w would be marked as used (since w has been used)
+			**/
+			mark[x] = mark[w^1] = 1;
+			///l is the edge length, instead of overlap length
+            ///note: edge length is different with overlap length
+			l = asg_arc_len(arc_first(g, w));
+			kdq_push(uint64_t, q, (uint64_t)w<<32 | l);
+            if(R_INF.trio_flag[w>>1] == FATHER) fn++;
+            if(R_INF.trio_flag[w>>1] == MOTHER) mn++;
+			end = x^1, len += l;
+			w = x;
+			if (x == v) break;
+		}
+		if (start != (end^1) || kdq_size(q) == 0) { // linear unitig
+			///length of seq, instead of edge
+			l = g->seq[end>>1].len;
+			kdq_push(uint64_t, q, (uint64_t)(end^1)<<32 | l);
+            if(R_INF.trio_flag[end>>1] == FATHER) fn++;
+            if(R_INF.trio_flag[end>>1] == MOTHER) mn++;
+			len += l;
+		} else { // circular unitig
+			start = end = UINT32_MAX;
+			goto add_unitig; // then it is not necessary to do the backward
+		}
+		// backward
+		x = v;
+		while (1) { // similar to forward but not the same
+			if (arc_cnt(g, x^1) != 1) break;
+			w = arc_first(g, x^1).v ^ 1; // w->x
+			if (arc_cnt(g, w) != 1) break;
+			mark[x] = mark[w^1] = 1;
+			l = asg_arc_len(arc_first(g, w));
+			///w is the seq id + direction, l is the length of edge
+			///push element to the front of a queue
+			kdq_unshift(uint64_t, q, (uint64_t)w<<32 | l);
+            if(R_INF.trio_flag[w>>1] == FATHER) fn++;
+            if(R_INF.trio_flag[w>>1] == MOTHER) mn++;
+            // fprintf(stderr, "uId: %u, >%.*s (%u)\n", 
+            // ug->u.n, (int)Get_NAME_LENGTH((R_INF), w>>1), Get_NAME((R_INF), w>>1), w>>1);
+
+			start = w, len += l;
+			x = w;
+		}
+add_unitig:
+		if (start != UINT32_MAX) mark[start] = mark[end] = 1;
+        // fprintf(stderr, "\n-0-[M::%s] fn::%u, mn::%u\n", __func__, fn, mn);
+        cn = MIN(fn, mn);
+        // if((cn > min_occ) && (cn > ((fn+mn)*cutoff)))
+        if((cn <= ((fn+mn)*cutoff)) || (cn <= min_occ)) {
+            kv_pushp(ma_utg_t, ug->u, &p);
+            p->s = 0, p->start = start, p->end = end, p->len = len, p->n = kdq_size(q), p->circ = (start == UINT32_MAX);
+            p->m = p->n;
+            kv_roundup32(p->m);
+            p->a = (uint64_t*)malloc(8 * p->m);
+            //all elements are saved here
+            for (i = 0; i < kdq_size(q); ++i) p->a[i] = kdq_at(q, i);
+        } else if(kdq_size(q)) {
+            ct = R_INF.trio_flag[kdq_at(q, 0)>>33]; 
+            if((ct != FATHER) && (ct != MOTHER)) ct = AMBIGU; 
+            pt = ct;
+            fn0 = fn; mn0 = mn; uidx.n = fn = mn = 0; 
+            if(ct == FATHER) fn++; if(ct == MOTHER) mn++;
+            for (k = 1, l = 0; k <= kdq_size(q); k++) {
+                st = 0; ct = AMBIGU;
+                if(k == kdq_size(q)) {
+                    st = 1;
+                } else {
+                    ct = R_INF.trio_flag[kdq_at(q, k)>>33];
+                    if((ct != FATHER) && (ct != MOTHER)) ct = AMBIGU; 
+                    if((ct != AMBIGU) && (pt != AMBIGU) && (ct != pt)) {
+                        st = 1;
+                    }
+                }
+                if(st) {
+                    // fprintf(stderr, "-1-[M::%s] l::%u, k::%u, kdq_size(q)::%u, fn::%u, mn::%u, ct::%u, pt::%u\n", 
+                    // __func__, l, k, (uint32_t)kdq_size(q), fn, mn, ct, pt);
+                    if(k < kdq_size(q)) {
+                        assert(fn || mn); assert((!fn) || (!mn));
+                    }
+                    z = l<<1; z |= (((uint64_t)MAX(fn, mn))<<32);
+                    if(mn) z |= 1;
+                    kv_push(uint64_t, uidx, z);
+                    fn = mn = 0; l = k;
+                }
+                if(ct != AMBIGU) pt = ct;
+                if(ct == FATHER) fn++; if(ct == MOTHER) mn++;
+            }
+
+
+            fn = mn = 0; fn1 = mn1 = n1 = 0;
+            if(uidx.a[0]&1) mn += uidx.a[0]>>32;
+            else fn += uidx.a[0]>>32;
+            for (k = 1, l = 0; k <= uidx.n; k++) {
+                st = 0;
+                if(k == uidx.n) {
+                    st = 1;
+                } else {
+                    if(uidx.a[k]&1) mn += uidx.a[k]>>32;
+                    else fn += uidx.a[k]>>32;
+                    cn = MIN(fn, mn);
+                    if((cn > min_occ) && (cn > ((fn+mn)*cutoff))) st = 1;
+                    // fprintf(stderr, "-2-[M::%s] fn::%u, mn::%u, cn::%u, ((fn+mn)*cutoff)::%u, st::%u\n", 
+                    // __func__, fn, mn, cn, (uint32_t)(((fn+mn)*cutoff)), st);
+                }
+                if(st) {
+                    // fprintf(stderr, "-3-[M::%s] fn::%u, mn::%u\n", __func__, fn, mn);
+                    sz = ((uint32_t)uidx.a[l])>>1;
+                    ez = ((k<uidx.n)?(((uint32_t)uidx.a[k])>>1):(kdq_size(q)));
+                    assert(ez > sz); n1 += ez - sz;
+                    kv_pushp(ma_utg_t, ug->u, &p);
+                    if ((start == UINT32_MAX) && (sz == 0) && (ez == kdq_size(q))) {///circle
+                        p->s = 0, p->start = start, p->end = end, p->len = len, p->n = kdq_size(q), p->circ = (start == UINT32_MAX);
+                        p->m = p->n;
+                        kv_roundup32(p->m);
+                        p->a = (uint64_t*)malloc(8 * p->m);
+                        //all elements are saved here
+                        for (i = 0; i < kdq_size(q); ++i) {
+                            p->a[i] = kdq_at(q, i);
+                            ct = R_INF.trio_flag[p->a[i]>>33];
+                            if((ct != FATHER) && (ct != MOTHER)) ct = AMBIGU; 
+                            if(ct == FATHER) fn1++; if(ct == MOTHER) mn1++;
+                        }
+                    } else {
+                        p->s = 0; p->len = 0; p->circ = 0;
+                        p->start = kdq_at(q, sz)>>32;
+                        p->end = (kdq_at(q, (ez-1))>>32)^1;
+                        p->m = p->n = ez - sz; kv_roundup32(p->m);
+                        p->a = (uint64_t*)malloc(8 * p->m);
+
+                        for (i = sz, z = 0; i+1 < ez; i++, z++) {
+                            p->a[z] = kdq_at(q, i); p->len += (uint32_t)p->a[z];
+                            ct = R_INF.trio_flag[p->a[z]>>33];
+                            if((ct != FATHER) && (ct != MOTHER)) ct = AMBIGU; 
+                            if(ct == FATHER) fn1++; if(ct == MOTHER) mn1++;
+                        }
+                        p->a[z] = kdq_at(q, i); p->a[z] >>= 32; p->a[z] <<= 32; 
+                        p->a[z] |= g->seq[p->a[z]>>33].len; p->len += (uint32_t)p->a[z];
+                        ct = R_INF.trio_flag[p->a[z]>>33];
+                        if((ct != FATHER) && (ct != MOTHER)) ct = AMBIGU; 
+                        if(ct == FATHER) fn1++; if(ct == MOTHER) mn1++;
+                    }
+                    fn = mn = 0; l = k;
+                    if(k < uidx.n) {
+                        if(uidx.a[k]&1) mn += uidx.a[k]>>32;
+                        else fn += uidx.a[k]>>32;
+                    }
+                }
+            }
+            assert(n1 == kdq_size(q));
+            assert(fn1 == fn0); assert(mn1 == mn0);
+        }
+	}
+	kdq_destroy(uint64_t, q); kv_destroy(uidx); 
+
+	// add arcs between unitigs; reusing mark for a different purpose
+	//ug saves all unitigs
+	for (v = 0; v < n_vtx; ++v) mark[v] = -1;
+
+    //mark all start nodes and end nodes of all unitigs
+	for (i = 0; i < ug->u.n; ++i) {
+		if (ug->u.a[i].circ) continue;
+		mark[ug->u.a[i].start] = i<<1 | 0;
+		mark[ug->u.a[i].end] = i<<1 | 1;
+	}
+
+	//scan all edges
+	for (i = 0; i < g->n_arc; ++i) {
+		asg_arc_t *p = &g->arc[i];
+		if (p->del) continue;
+		///to connect two unitigs, we need to connect the end of unitig x to the start of unitig y
+		///so we need to ^1 to get the reverse direction of (x's end)?
+        ///>=0 means this node is a start/end node of an unitig
+        ///means this node is a intersaction node
+		if (mark[p->ul>>32^1] >= 0 && mark[p->v] >= 0) {
+			asg_arc_t *q;
+			uint32_t u = mark[p->ul>>32^1]^1;
+			int l = ug->u.a[u>>1].len - p->ol;
+			if (l < 0) l = 1;
+			q = asg_arc_pushp(ug->g);
+			q->ol = p->ol, q->del = 0;
+			q->ul = (uint64_t)u<<32 | l;
+			q->v = mark[p->v]; q->ou = 0;
+            q->el = p->el;
+		}
+	}
+	for (i = 0; i < ug->u.n; ++i)
+		asg_seq_set(ug->g, i, ug->u.a[i].len, 0);
+	asg_cleanup(ug->g);
+	free(mark);
+	return ug;
+}
+
 ma_ug_t *ma_ug_gen_primary(asg_t *g, uint8_t flag)
 {
     asg_cleanup(g);
@@ -13330,7 +13602,7 @@ ma_hit_t_alloc* sources, R_to_U* ruIndex, int max_hang, int min_ovlp, kvec_asg_a
     FILE* output_file = fopen(gfa_name, "w");
     ma_ug_print(ug, sg, coverage_cut, sources, ruIndex, "ptg", output_file);
     fclose(output_file);
-	
+
 	if(asm_opt.make_only_primary_contigs == 1)
 	{
 		fprintf(stderr, "Modified_hifiasm finishes because the user has provided the option \"--only-primary\"");
@@ -13811,7 +14083,7 @@ void debug_hapS(uint32_t *hapS, uint32_t rn)
 }
 void output_poly_trio(asg_t *sg, ma_sub_t* coverage_cut, char* output_file_name, ma_hit_t_alloc* sources, 
 ma_hit_t_alloc* reverse_sources, long long tipsLen, float tip_drop_ratio, long long stops_threshold, 
-R_to_U* ruIndex, float chimeric_rate, float drop_ratio, int max_hang, int min_ovlp, int is_bench, 
+R_to_U* ruIndex, float chimeric_rate, float drop_ratio, int max_hang, int min_ovlp, int gap_fuzz, int is_bench, 
 bub_label_t* b_mask_t, uint32_t hapN)
 {
     uint32_t i;
@@ -13822,7 +14094,7 @@ bub_label_t* b_mask_t, uint32_t hapN)
         update_poly_trio(1<<i, hapS, sg->n_seq);
         sprintf(fp, "hap%u", i+1);
         output_trio_unitig_graph(sg, coverage_cut, output_file_name, FATHER, sources, reverse_sources, tipsLen, tip_drop_ratio, 
-        stops_threshold, ruIndex, chimeric_rate, drop_ratio, max_hang, min_ovlp, is_bench, b_mask_t, fp, NULL, NULL);
+        stops_threshold, ruIndex, chimeric_rate, drop_ratio, max_hang, min_ovlp, gap_fuzz, is_bench, b_mask_t, fp, NULL, NULL);
     }
     free(fp); free(hapS);
 }
@@ -15214,11 +15486,10 @@ void purge_ovlp_cov_adv(uint32_t id, kv_u_trans_t *ta, asg64_v *b64, ug_rid_cov_
     }
 }
 
-void trans_sec_cut_filter_mmhap_adv(kv_u_trans_t *ta, ma_ug_t *ug, asg_t *sg, ma_hit_t_alloc* src)
+void trans_sec_cut_filter_mmhap_adv(kv_u_trans_t *ta, ma_ug_t *ug, asg_t *sg, ma_hit_t_alloc* src, ug_rid_cov_t *in)
 {
-    uint64_t k; 
-    asg64_v b64; kv_init(b64);
-    ug_rid_cov_t *cc = gen_ug_rid_cov_t(ug, sg, src);
+    uint64_t k; asg64_v b64; kv_init(b64);
+    ug_rid_cov_t *cc = ((in)?(in):(gen_ug_rid_cov_t(ug, sg, src)));
 
     fprintf(stderr, "+[M::%s]\thom_cov::%lu\thet_cov::%lu\thom_cut::%lu\n", __func__, 
         cc->hom_cov, cc->het_cov, cc->hom_max);
@@ -15227,7 +15498,7 @@ void trans_sec_cut_filter_mmhap_adv(kv_u_trans_t *ta, ma_ug_t *ug, asg_t *sg, ma
         purge_ovlp_cov_adv(k, ta, &b64, cc, cc->hom_max);
     }
 
-    destory_ug_rid_cov_t(cc); free(cc); kv_destroy(b64);
+    if(!in) {destory_ug_rid_cov_t(cc); free(cc);} kv_destroy(b64);
 }
 
 static void worker_for_trans_sec_simple_cut(void *data, long i, int tid) // callback for kt_for()
@@ -15257,8 +15528,18 @@ static void worker_for_trans_sec_simple_cut(void *data, long i, int tid) // call
     }
 }
 
+void gen_ug_rid_cov_t_by_ovlp(kv_u_trans_t *ta, ug_rid_cov_t *cc)
+{
+    uint64_t z, k, id, n; u_trans_t *a; 
+    for (z = 0; z < cc->ug->g->n_seq; z++) {
+        id = z; a = u_trans_a(*ta, id); n = u_trans_n(*ta, id); 
+        for (k = 0; k < n; k++) {
+            append_cov_line_ug_rid_cov_t(id, cc->cov.a+cc->idx[id], &(a[k]), cc, ((uint64_t)-1), -1);
+        }
+    }
+}
 
-void clean_u_trans_t_idx_filter_mmhap_adv(kv_u_trans_t *ta, ma_ug_t *ug, asg_t *read_g, ma_hit_t_alloc* src)
+void clean_u_trans_t_idx_filter_mmhap_adv(kv_u_trans_t *ta, ma_ug_t *ug, asg_t *read_g, ma_hit_t_alloc* src, ug_rid_cov_t *in)
 {
     u_trans_clean_t sl; uint64_t k, i, l, st, occ; ha_mzl_t *tz;
     ha_mzl_v srt_a; kv_u_trans_t *bl; u_trans_t *z;
@@ -15324,7 +15605,7 @@ void clean_u_trans_t_idx_filter_mmhap_adv(kv_u_trans_t *ta, ma_ug_t *ug, asg_t *
     free(sl.res); kv_destroy(srt_a);
     kt_u_trans_t_idx(ta, ug->g->n_seq);
     // dbg_prt_utg_trans(ta, ug, "after");
-    trans_sec_cut_filter_mmhap_adv(ta, ug, read_g, src);
+    trans_sec_cut_filter_mmhap_adv(ta, ug, read_g, src, in);
     kt_for(sl.n_thread, worker_for_trans_sec_simple_cut, &sl, sl.ta->idx.n);
 
     // CALLOC(sl.srt, sl.n_thread); sl.sec_rate = 0.5;
@@ -15344,6 +15625,7 @@ void clean_u_trans_t_idx_filter_mmhap_adv(kv_u_trans_t *ta, ma_ug_t *ug, asg_t *
             st = i;
         }
     }
+
     // dbg_prt_utg_extra_trans(ta, ug, asm_opt.output_file_name);
 }
 
@@ -15471,13 +15753,16 @@ long long gap_fuzz, bub_label_t* b_mask_t, ug_opt_t *opt)
     kv_destroy(d_edges.a);
     asg_cleanup(sg);
 
-    // reduce_hamming_error(sg, sources, coverage_cut, max_hang, min_ovlp, gap_fuzz);
-    reduce_hamming_error_adv(NULL, sg, sources, coverage_cut, max_hang, min_ovlp, gap_fuzz, opt->ruIndex, NULL);
+    // reduce_hamming_error_adv(NULL, sg, sources, coverage_cut, max_hang, min_ovlp, gap_fuzz, opt->ruIndex, NULL);
 
-    ug_fa = output_trio_unitig_graph(sg, coverage_cut, output_file_name, FATHER, sources, reverse_sources, (asm_opt.max_short_tip*2), 0.15, 3, ruIndex, 
-    0.05, 0.9, max_hang, min_ovlp, rhits?1:0, b_mask_t, NULL, NULL, NULL);
-    ug_mo = output_trio_unitig_graph(sg, coverage_cut, output_file_name, MOTHER, sources, reverse_sources, (asm_opt.max_short_tip*2), 0.15, 3, ruIndex, 
-    0.05, 0.9, max_hang, min_ovlp, rhits?1:0, b_mask_t, NULL, NULL, NULL);
+    // ug_fa = output_trio_unitig_graph(sg, coverage_cut, output_file_name, FATHER, sources, reverse_sources, (asm_opt.max_short_tip*2), 0.15, 3, ruIndex, 
+    // 0.05, 0.9, max_hang, min_ovlp, gap_fuzz, rhits?1:0, b_mask_t, NULL, NULL, NULL);
+    // ug_mo = output_trio_unitig_graph(sg, coverage_cut, output_file_name, MOTHER, sources, reverse_sources, (asm_opt.max_short_tip*2), 0.15, 3, ruIndex, 
+    // 0.05, 0.9, max_hang, min_ovlp, gap_fuzz, rhits?1:0, b_mask_t, NULL, NULL, NULL);
+
+
+    output_trio_graph_joint(sg, coverage_cut, output_file_name, sources, reverse_sources, (asm_opt.max_short_tip*2), 0.15, 3, ruIndex, 
+    0.05, 0.9, max_hang, min_ovlp, gap_fuzz, b_mask_t, rhits?(&ug_fa):NULL, rhits?(&ug_mo):NULL);
     if(rhits)
     {
         ha_aware_order(rhits, sg, ug_fa, ug_mo, cov?&(cov->t_ch->k_trans):&(t_ch->k_trans), opt, 3);
@@ -15612,7 +15897,7 @@ ma_ug_t *mm_ug, mmhap_t *rh, uint32_t n_hap)
         sprintf(fp, "hap%u", i+1);
         update_trio_mmhap(i, mm_ug, rh, sg, n_hap);
         output_trio_unitig_graph(sg, coverage_cut, output_file_name, FATHER, sources, reverse_sources, tipsLen, tip_drop_ratio, 
-        stops_threshold, ruIndex, chimeric_rate, drop_ratio, max_hang, min_ovlp, 0, b_mask_t, fp, NULL, NULL);
+        stops_threshold, ruIndex, chimeric_rate, drop_ratio, max_hang, min_ovlp, gap_fuzz, 0, b_mask_t, fp, NULL, NULL);
     }
     free(fp);
 }
@@ -15666,9 +15951,9 @@ float chimeric_rate, float drop_ratio, int max_hang, int min_ovlp, long long gap
         // if((asm_opt.flag & HA_F_VERBOSE_GFA)) write_trans_chain(cov->t_ch, output_file_name);
     }
 
-    dbg_prt_utg_trans(&(cov?cov->t_ch->k_trans:t_ch->k_trans), ug, "pre");
-    clean_u_trans_t_idx_filter_mmhap_adv(&(cov?cov->t_ch->k_trans:t_ch->k_trans), ug, sg, opt->sources);
-    dbg_prt_utg_trans(&(cov?cov->t_ch->k_trans:t_ch->k_trans), ug, "after");
+    // dbg_prt_utg_trans(&(cov?cov->t_ch->k_trans:t_ch->k_trans), ug, "pre");
+    clean_u_trans_t_idx_filter_mmhap_adv(&(cov?cov->t_ch->k_trans:t_ch->k_trans), ug, sg, opt->sources, NULL);
+    // dbg_prt_utg_trans(&(cov?cov->t_ch->k_trans:t_ch->k_trans), ug, "after");
 
     // refine_hic_trans_mmhap(opt, &(cov?cov->t_ch->k_trans:t_ch->k_trans), sg, ug);
     ///for debug
@@ -15729,7 +16014,7 @@ float chimeric_rate, float drop_ratio, int max_hang, int min_ovlp, long long gap
     kv_destroy(d_edges.a);
     asg_cleanup(sg);
 
-    dbg_prt_trio_mmhap_label(ug, rh, output_file_name);
+    // dbg_prt_trio_mmhap_label(ug, rh, output_file_name);
 
     output_trio_mmhap(sg, coverage_cut, output_file_name, sources, reverse_sources, tipsLen, tip_drop_ratio, 
     stops_threshold, ruIndex, chimeric_rate, drop_ratio, max_hang, min_ovlp, gap_fuzz, b_mask_t, opt, ug, rh, asm_opt.polyploidy);
@@ -15851,9 +16136,9 @@ long long gap_fuzz, bub_label_t* b_mask_t)
     reduce_hamming_error_adv(NULL, sg, sources, coverage_cut, max_hang, min_ovlp, gap_fuzz, opt.ruIndex, NULL);
 
     output_trio_unitig_graph(sg, coverage_cut, output_file_name, FATHER, sources, reverse_sources, (asm_opt.max_short_tip*2), 0.15, 3, ruIndex, 
-    0.05, 0.9, max_hang, min_ovlp, 0, b_mask_t, NULL, NULL, NULL);
+    0.05, 0.9, max_hang, min_ovlp, gap_fuzz, 0, b_mask_t, NULL, NULL, NULL);
     output_trio_unitig_graph(sg, coverage_cut, output_file_name, MOTHER, sources, reverse_sources, (asm_opt.max_short_tip*2), 0.15, 3, ruIndex, 
-    0.05, 0.9, max_hang, min_ovlp, 0, b_mask_t, NULL, NULL, NULL);
+    0.05, 0.9, max_hang, min_ovlp, gap_fuzz, 0, b_mask_t, NULL, NULL, NULL);
 }
 
 void set_trio_flag_by_cov(ma_ug_t *ug, asg_t *read_g, hap_cov_t *cov)
@@ -16616,7 +16901,7 @@ void output_bp_graph_adv(asg_t *sg, ma_sub_t* coverage_cut, char* output_file_na
 ma_hit_t_alloc* sources, ma_hit_t_alloc* reverse_sources, 
 long long tipsLen, float tip_drop_ratio, long long stops_threshold, 
 R_to_U* ruIndex, float chimeric_rate, float drop_ratio, int max_hang, int min_ovlp, 
-bub_label_t* b_mask_t, ug_opt_t *opt)
+int gap_fuzz, bub_label_t* b_mask_t, ug_opt_t *opt)
 { 
     hic_clean(sg);
 
@@ -16696,16 +16981,15 @@ bub_label_t* b_mask_t, ug_opt_t *opt)
 
 
     output_trio_unitig_graph(sg, coverage_cut, output_file_name, FATHER, sources, reverse_sources, (asm_opt.max_short_tip*2), 0.15, 3, ruIndex, 
-    0.05, 0.9, max_hang, min_ovlp, 0, b_mask_t, NULL, NULL, NULL);
+    0.05, 0.9, max_hang, min_ovlp, gap_fuzz, 0, b_mask_t, NULL, NULL, NULL);
     output_trio_unitig_graph(sg, coverage_cut, output_file_name, MOTHER, sources, reverse_sources, (asm_opt.max_short_tip*2), 0.15, 3, ruIndex, 
-    0.05, 0.9, max_hang, min_ovlp, 0, b_mask_t, NULL, NULL, NULL);
+    0.05, 0.9, max_hang, min_ovlp, gap_fuzz, 0, b_mask_t, NULL, NULL, NULL);
 }
 
 void output_bp_graph(asg_t *sg, ma_sub_t* coverage_cut, char* output_file_name, 
-ma_hit_t_alloc* sources, ma_hit_t_alloc* reverse_sources, 
-long long tipsLen, float tip_drop_ratio, long long stops_threshold, 
-R_to_U* ruIndex, float chimeric_rate, float drop_ratio, int max_hang, int min_ovlp, 
-bub_label_t* b_mask_t)
+ma_hit_t_alloc* sources, ma_hit_t_alloc* reverse_sources, long long tipsLen, float tip_drop_ratio, long long stops_threshold, 
+R_to_U* ruIndex, float chimeric_rate, float drop_ratio, int max_hang, int min_ovlp, bub_label_t* b_mask_t,
+long long gap_fuzz, ug_opt_t *opt)
 { 
     hic_clean(sg);
     kvec_asg_arc_t_warp new_rtg_edges;
@@ -16743,10 +17027,15 @@ bub_label_t* b_mask_t)
     ma_ug_destroy(ug);
     kv_destroy(new_rtg_edges.a);
 
-    output_trio_unitig_graph(sg, coverage_cut, output_file_name, FATHER, sources, reverse_sources, (asm_opt.max_short_tip*2), 0.15, 3, ruIndex, 
-    0.05, 0.9, max_hang, min_ovlp, 0, b_mask_t, NULL, NULL, NULL);
-    output_trio_unitig_graph(sg, coverage_cut, output_file_name, MOTHER, sources, reverse_sources, (asm_opt.max_short_tip*2), 0.15, 3, ruIndex, 
-    0.05, 0.9, max_hang, min_ovlp, 0, b_mask_t, NULL, NULL, NULL);
+    // reduce_hamming_error_adv(NULL, sg, sources, coverage_cut, max_hang, min_ovlp, gap_fuzz, opt->ruIndex, NULL);
+    
+    // output_trio_unitig_graph(sg, coverage_cut, output_file_name, FATHER, sources, reverse_sources, (asm_opt.max_short_tip*2), 0.15, 3, ruIndex, 
+    // 0.05, 0.9, max_hang, min_ovlp, gap_fuzz, 0, b_mask_t, NULL, NULL, NULL);
+    // output_trio_unitig_graph(sg, coverage_cut, output_file_name, MOTHER, sources, reverse_sources, (asm_opt.max_short_tip*2), 0.15, 3, ruIndex, 
+    // 0.05, 0.9, max_hang, min_ovlp, gap_fuzz, 0, b_mask_t, NULL, NULL, NULL);
+    
+    output_trio_graph_joint(sg, coverage_cut, output_file_name, sources, reverse_sources, (asm_opt.max_short_tip*2), 0.15, 3, ruIndex, 
+    0.05, 0.9, max_hang, min_ovlp, gap_fuzz, b_mask_t, NULL, NULL);
 }
 
 ma_ug_t* merge_utg(ma_ug_t **dest, ma_ug_t **src)
@@ -16809,15 +17098,15 @@ ma_ug_t* merge_utg(ma_ug_t **dest, ma_ug_t **src)
 void benchmark_hic_graph(asg_t *sg, ma_sub_t* coverage_cut, char* output_file_name, 
 ma_hit_t_alloc* sources, ma_hit_t_alloc* reverse_sources, 
 long long tipsLen, float tip_drop_ratio, long long stops_threshold, R_to_U* ruIndex, 
-float chimeric_rate, float drop_ratio, int max_hang, int min_ovlp, bub_label_t* b_mask_t)
+float chimeric_rate, float drop_ratio, int max_hang, int min_ovlp, int gap_fuzz, bub_label_t* b_mask_t)
 {
     ma_ug_t *ug_1 = output_trio_unitig_graph(sg, coverage_cut, output_file_name, FATHER, sources, 
     reverse_sources, tipsLen, tip_drop_ratio, stops_threshold, ruIndex, 
-    chimeric_rate, drop_ratio, max_hang, min_ovlp, 1, b_mask_t, NULL, NULL, NULL);
+    chimeric_rate, drop_ratio, max_hang, min_ovlp, gap_fuzz, 1, b_mask_t, NULL, NULL, NULL);
 
     ma_ug_t *ug_2 = output_trio_unitig_graph(sg, coverage_cut, output_file_name, MOTHER, sources, 
     reverse_sources, tipsLen, tip_drop_ratio, stops_threshold, ruIndex, 
-    chimeric_rate, drop_ratio, max_hang, min_ovlp, 1, b_mask_t, NULL, NULL, NULL);
+    chimeric_rate, drop_ratio, max_hang, min_ovlp, gap_fuzz, 1, b_mask_t, NULL, NULL, NULL);
     fprintf(stderr, "ug_1->u.n: %u, ug_2->u.n: %u\n", (uint32_t)ug_1->u.n, (uint32_t)ug_2->u.n);
     ma_ug_t *ug = merge_utg(&ug_1, &ug_2);
     fprintf(stderr, "ug->u.n: %u\n", (uint32_t)ug->u.n);
@@ -18593,55 +18882,287 @@ asg_t* copy_read_graph(asg_t *src)
     return dest;
 }
 
+rd_hamming_fly_t* gen_rd_hamming_fly_t(ma_ug_t *ug, asg_t *sg)
+{
+    rd_hamming_fly_t *p; CALLOC(p, 1);
+    MALLOC(p->o2n, sg->n_seq); 
+    memset(p->o2n, -1, sizeof((*(p->o2n)))*sg->n_seq);
+    MALLOC(p->ugh, ug->g->n_seq);
+    return p;
+}
+
+void destroy_rd_hamming_fly_t(rd_hamming_fly_t *p)
+{
+    free(p->o2n); free(p->srt->a); free(p->srt);
+    ma_ug_destroy(p->nug); asg_destroy(p->nsg);
+    asg_destroy(p->ref); free(p->ugh);
+}
+
+void recall_arcs(asg_t *des, asg_t *src)
+{
+    uint32_t v, w, n_vtx = src->n_seq*2;
+    asg_arc_t *av, *za, *p; uint32_t an, zn, ai, zi, k;
+    kvec_t(asg_arc_t) ka; kv_init(ka);
+
+    for (v = 0; v < n_vtx; ++v) {
+        if(src->seq[v>>1].del) continue;
+        za = asg_arc_a(src, v); zn = asg_arc_n(src, v); 
+        av = asg_arc_a(des, v); an = asg_arc_n(des, v); 
+        for (zi = 0; zi < zn; zi++) {
+            if(za[zi].del) continue;
+            w = za[zi].v;
+            for (ai = 0; ai < an; ai++) {
+                if(av[ai].del) continue;
+                if(av[ai].v == w) break;
+            }
+            if(ai >= an) kv_push(asg_arc_t, ka, za[zi]);
+        }
+    }
+
+    if(ka.n) {
+        for (k = 0; k < ka.n; k++) {
+            p = asg_arc_pushp(des); *p = (ka.a[k]); 
+        }
+        free(des->idx);
+        des->idx = 0;
+        des->is_srt = 0;
+        asg_cleanup(des);
+        // asg_symm(des);
+    }
+    fprintf(stderr, "[M::%s] # transitive arcs::%u\n", __func__, (uint32_t)ka.n);
+    fprintf(stderr, "[M::%s] # new arcs::%u, # old arcs::%u\n", __func__, des->n_arc, src->n_arc);
+    
+    kv_destroy(ka);
+}
+
+ma_ug_t* gen_fg(ma_ug_t *ug, asg_t *rg, ma_hit_t_alloc* src, ma_sub_t *cov, int32_t max_hang, int32_t min_ovlp, int32_t gap_fuzz)
+{
+    uint32_t *idx; MALLOC(idx, rg->n_seq); 
+    memset(idx, -1, sizeof((*idx))*rg->n_seq);
+    ma_ug_t *fg = copy_untig_graph(ug); asg_cleanup(fg->g);///some edges might be deleted
+    kvec_t(uint64_t) srt; kv_init(srt);    
+    uint64_t i, k, l, m, rv, rw, uv, uw, zn, z, nist = 0; ma_utg_t *u;
+    for (k = 0; k < fg->u.n; k++) {
+        u = &(ug->u.a[k]); fg->g->seq[k].c = PRIMARY_LABLE;
+        if(u->circ) continue;
+        m = k<<1; m |= (((uint64_t)u->start)<<32); kv_push(uint64_t, srt, m);
+        m = (k<<1)+1; m |= (((uint64_t)u->end)<<32); kv_push(uint64_t, srt, m);
+    }
+
+    radix_sort_arch64(srt.a, srt.a+srt.n);
+    for (k = 1, l = 0; k <= srt.n; k++) {
+        if(k == srt.n || (srt.a[k]>>33) != (srt.a[l]>>33)) {
+            idx[srt.a[l]>>33] = l;
+            l = k;
+        }
+    }
+
+    ma_hit_t_alloc* x; asg_arc_t *za; 
+    ma_hit_t *h; ma_sub_t *sq, *st;
+    int32_t r; asg_arc_t t0, t1, *p;
+    for (k = 0; k < fg->u.n; k++) {
+        u = &(ug->u.a[k]);
+        if(u->circ) continue;
+
+        uv = k<<1; rv = u->end^1;
+        x = &(src[rv>>1]); 
+        za = asg_arc_a(ug->g, uv); 
+        zn = asg_arc_n(ug->g, uv); 
+        for (i = 0; i < x->length; i++) {
+            h = &(x->buffer[i]);
+            // if(!(h->el)) continue;
+            sq = &(cov[Get_qn(*h)]); st = &(cov[Get_tn(*h)]);
+            if(st->del || rg->seq[Get_tn(*h)].del) continue;
+            r = ma_hit2arc(h, sq->e - sq->s, st->e - st->s, max_hang, 
+                                asm_opt.max_hang_rate, min_ovlp, &t0);
+        
+            ///if it is a contained read, skip
+            if(r < 0) continue;
+            if((t0.ul>>32) != rv) continue;
+            rw = t0.v;
+            if(idx[rw>>1] == ((uint32_t)-1)) continue;
+            m = idx[rw>>1]; assert((srt.a[m]>>33) == (rw>>1));
+            for (; m < srt.n && (srt.a[m]>>33) == (rw>>1); m++) {
+                if(rw == (srt.a[m]>>32)) {
+                    uw = (uint32_t)srt.a[m];
+                    if(uv == uw) continue;
+                    for (z = 0; z < zn; z++) {
+                        if((!za[z].del) && (za[z].v==uw)) break;
+                    }
+                    if(z < zn) continue;
+                    if(get_edge_from_source(src, cov, NULL, max_hang, min_ovlp, (t0.v^1), ((t0.ul>>32)^1), &t1)) {
+                        p = asg_arc_pushp(fg->g); *p = t0; 
+                        p->ul<<=32; p->ul>>=32; p->ul |= (uv<<32); p->v = uw;
+
+                        p = asg_arc_pushp(fg->g); *p = t1;
+                        p->ul<<=32; p->ul>>=32; p->ul |= ((uw^1)<<32); p->v = uv^1;
+                        nist++;
+                    }
+                }
+            }
+        }
+
+
+        uv = (k<<1)+1; rv = u->start^1;
+        x = &(src[rv>>1]); 
+        za = asg_arc_a(ug->g, uv); 
+        zn = asg_arc_n(ug->g, uv); 
+        for (i = 0; i < x->length; i++) {
+            h = &(x->buffer[i]);
+            // if(!(h->el)) continue;
+            sq = &(cov[Get_qn(*h)]); st = &(cov[Get_tn(*h)]);
+            if(st->del || rg->seq[Get_tn(*h)].del) continue;
+            r = ma_hit2arc(h, sq->e - sq->s, st->e - st->s, max_hang, 
+                                asm_opt.max_hang_rate, min_ovlp, &t0);
+        
+            ///if it is a contained read, skip
+            if(r < 0) continue;
+            if((t0.ul>>32) != rv) continue;
+            rw = t0.v;
+            if(idx[rw>>1] == ((uint32_t)-1)) continue;
+            m = idx[rw>>1]; assert((srt.a[m]>>33) == (rw>>1));
+            for (; m < srt.n && (srt.a[m]>>33) == (rw>>1); m++) {
+                if(rw == (srt.a[m]>>32)) {
+                    uw = (uint32_t)srt.a[m];
+                    if(uv == uw) continue;
+                    for (z = 0; z < zn; z++) {
+                        if((!za[z].del) && (za[z].v==uw)) break;
+                    }
+                    if(z < zn) continue;
+                    if(get_edge_from_source(src, cov, NULL, max_hang, min_ovlp, (t0.v^1), ((t0.ul>>32)^1), &t1)) {
+                        p = asg_arc_pushp(fg->g); *p = t0; 
+                        p->ul<<=32; p->ul>>=32; p->ul |= (uv<<32); p->v = uw;
+
+                        p = asg_arc_pushp(fg->g); *p = t1;
+                        p->ul<<=32; p->ul>>=32; p->ul |= ((uw^1)<<32); p->v = uv^1;
+                        nist++;
+                    }
+                }
+            }
+        }
+    }
+
+    if(nist) {
+        free(fg->g->idx);
+        fg->g->idx = 0;
+        fg->g->is_srt = 0;
+        asg_cleanup(fg->g);
+        asg_symm(fg->g);
+        asg_arc_del_trans(fg->g, gap_fuzz);
+        ///some of old edges might be lost due the transitive reduction
+        recall_arcs(fg->g, ug->g);
+    }
+
+    kv_destroy(srt); free(idx);
+    return fg;
+}
+
+rd_hamming_fly_simp_t* gen_rd_hamming_fly_simp_t(ma_ug_t *ug, asg_t *rg, ma_hit_t_alloc* src, ma_sub_t *cov, int32_t max_hang, int32_t min_ovlp, int32_t gap_fuzz, kvec_asg_arc_t_warp *ae)
+{
+    rd_hamming_fly_simp_t *p; CALLOC(p, 1);
+    // p->ng = asg_init();
+    // p->ng->n_seq = p->ng->m_seq = ug->g->n_seq; 
+    // MALLOC(p->ng->seq, p->ng->n_seq);
+    // memcpy(p->ng->seq, ug->g->seq, (sizeof((*(p->ng->seq)))*p->ng->n_seq));
+    p->src = src; p->cov = cov; p->max_hang = max_hang; p->min_ovlp = min_ovlp; p->gap_fuzz = gap_fuzz;
+    p->fg = gen_fg(ug, rg, src, cov, max_hang, min_ovlp, gap_fuzz); p->n_insert = 0;
+    CALLOC(p->vs, (ug->g->n_seq<<1)); CALLOC(p->srt, 1); p->ae = ae;
+    // p->fg = gen_fg();
+    // p->rg = rg; MALLOC(p->rs, rg->n_seq<<1); 
+    // memset(p->rs, -1, sizeof((*(p->rs)))*(rg->n_seq<<1));
+    return p;
+}
+
+void destroy_rd_hamming_fly_simp_t(rd_hamming_fly_simp_t *p)
+{
+    ///asg_destroy(p->ng); ///free(p->rs);
+    ma_ug_destroy(p->fg); 
+    free(p->vs); 
+    free(p->srt->a); 
+    free(p->srt); 
+}
+
 void clean_trio_untig_graph(ma_ug_t *ug, asg_t *read_g, ma_sub_t* coverage_cut, 
 ma_hit_t_alloc* sources, ma_hit_t_alloc* reverse_sources, 
 long long tipsLen, float tip_drop_ratio, long long stops_threshold, 
 R_to_U* ruIndex, buf_t* b_0, uint8_t* visit, float density, uint32_t miniHapLen, 
 uint32_t miniBiGraph, float chimeric_rate, int is_final_clean, int just_bubble_pop, 
-float drop_ratio, uint32_t trio_flag, float trio_drop_rate, hap_cov_t *cov)
+float drop_ratio, uint32_t trio_flag, float trio_drop_rate, int max_hang, int min_ovlp, 
+int gap_fuzz, hap_cov_t *cov, kvec_asg_arc_t_warp *ae)
 {
-    asg_t *g = ug->g;
+    asg_t *g = ug->g; rd_hamming_fly_simp_t *p = NULL; 
     uint32_t is_first = 1;
-
+    // if(trio_flag == MOTHER) {
+    //     print_debug_gfa(read_g, ug, coverage_cut, "debug_dups", sources, ruIndex, asm_opt.max_hang_Len, asm_opt.min_overlap_Len, 0, 1, 0);
+    //     exit(1);
+    // }
     redo:
-    ///print_untig((ug), 61955, "i-0:", 0);
-    
+    // if(trio_flag == MOTHER) print_untig((ug), 9, "i-0:", 0);
+    // if(trio_flag == MOTHER) print_untig((ug), 10, "i-0:", 0);
+    ///debug
+    // if(!p) p = gen_rd_hamming_fly_simp_t(ug, read_g, sources, coverage_cut, max_hang, min_ovlp, gap_fuzz); 
     // fprintf(stderr, "[M::%s] 0\n", __func__);
-    asg_pop_bubble_primary_trio(ug, NULL, trio_flag, DROP, cov, NULL, 1);
+    asg_pop_bubble_primary_trio(ug, NULL, trio_flag, DROP, cov, NULL, 1, p);
+    ///do not need to refine bubbles during the first round of cleaning
+    if(!p) p = gen_rd_hamming_fly_simp_t(ug, read_g, sources, coverage_cut, max_hang, min_ovlp, gap_fuzz, ae); 
+
+    // if(trio_flag == MOTHER) print_untig((ug), 9, "i-1:", 0);
+    // if(trio_flag == MOTHER) print_untig((ug), 10, "i-1:", 0);
     // fprintf(stderr, "[M::%s] 1\n", __func__);
     magic_trio_phasing(g, ug, read_g, coverage_cut, sources, reverse_sources, 2, ruIndex, trio_flag, trio_drop_rate);        
     // fprintf(stderr, "[M::%s] 2\n", __func__);
+
+    // if(trio_flag == MOTHER) print_untig((ug), 9, "i-2:", 0);
+    // if(trio_flag == MOTHER) print_untig((ug), 10, "i-2:", 0);
     /**********debug**********/
     if(just_bubble_pop == 0)
     {
         cut_trio_tip_primary(g, ug, tipsLen, trio_flag, 0, read_g, reverse_sources, ruIndex, cov->is_r_het, 2);
     }
     // fprintf(stderr, "[M::%s] 3\n", __func__);
+    // if(trio_flag == MOTHER) print_untig((ug), 9, "i-3:", 0);
+    // if(trio_flag == MOTHER) print_untig((ug), 10, "i-3:", 0);
     /**********debug**********/
     long long pre_cons = get_graph_statistic(g);
     long long cur_cons = 0;
     while(pre_cons != cur_cons)
     {
         // fprintf(stderr, "[M::%s] 4\n", __func__);
+        // if(trio_flag == MOTHER) print_untig((ug), 9, "i-4:", 0);
+        // if(trio_flag == MOTHER) print_untig((ug), 10, "i-4:", 0);
         pre_cons = get_graph_statistic(g);
         // fprintf(stderr, "[M::%s] 5\n", __func__);
+        // if(trio_flag == MOTHER) print_untig((ug), 9, "i-5:", 0);
+        // if(trio_flag == MOTHER) print_untig((ug), 10, "i-5:", 0);
         ///need consider tangles
-        asg_pop_bubble_primary_trio(ug, NULL, trio_flag, DROP, cov, NULL, 1);
+        asg_pop_bubble_primary_trio(ug, NULL, trio_flag, DROP, cov, NULL, 1, p);
         // fprintf(stderr, "[M::%s] 6\n", __func__);
+        // if(trio_flag == MOTHER) print_untig((ug), 9, "i-6:", 0);
+        // if(trio_flag == MOTHER) print_untig((ug), 10, "i-6:", 0);
         /**********debug**********/
         if(just_bubble_pop == 0)
         {
             ///need consider tangles
             asg_arc_cut_trio_long_tip_primary(g, ug, read_g, reverse_sources, ruIndex, 2, tip_drop_ratio, trio_flag, cov, NULL);            
+            // if(trio_flag == MOTHER) print_untig((ug), 9, "i-7:", 0);
+            // if(trio_flag == MOTHER) print_untig((ug), 10, "i-7:", 0);
             // fprintf(stderr, "[M::%s] 7\n", __func__);
             // if(trio_flag == MOTHER) print_debug_gfa(read_g, ug, coverage_cut, "debug_dups", sources, ruIndex, asm_opt.max_hang_Len, asm_opt.min_overlap_Len);
             asg_arc_cut_trio_long_equal_tips_assembly(g, ug, read_g, reverse_sources, 2, ruIndex, trio_flag, cov, NULL);            
+            // if(trio_flag == MOTHER) print_untig((ug), 9, "i-8:", 0);
+            // if(trio_flag == MOTHER) print_untig((ug), 10, "i-8:", 0);
             // fprintf(stderr, "[M::%s] 8\n", __func__);
             asg_arc_cut_trio_long_tip_primary_complex(g, ug, read_g, reverse_sources, ruIndex, 2, tip_drop_ratio, stops_threshold, cov, NULL, trio_flag);
+            // if(trio_flag == MOTHER) print_untig((ug), 9, "i-9:", 0);
+            // if(trio_flag == MOTHER) print_untig((ug), 10, "i-9:", 0);
             // fprintf(stderr, "[M::%s] 9\n", __func__);
             asg_arc_cut_trio_long_equal_tips_assembly_complex(g, ug, read_g, reverse_sources, 2, ruIndex, stops_threshold, cov, NULL, trio_flag);
+            // if(trio_flag == MOTHER) print_untig((ug), 9, "i-10:", 0);
+            // if(trio_flag == MOTHER) print_untig((ug), 10, "i-10:", 0);
             // fprintf(stderr, "[M::%s] 10\n", __func__);
             detect_chimeric_by_topo(g, ug, read_g, reverse_sources, 2, stops_threshold, chimeric_rate, ruIndex, NULL, cov->is_r_het);
+            // if(trio_flag == MOTHER) print_untig((ug), 9, "i-11:", 0);
+            // if(trio_flag == MOTHER) print_untig((ug), 10, "i-11:", 0);
             // fprintf(stderr, "[M::%s] 11\n", __func__);
             ///need consider tangles
             ///note we need both the read graph and the untig graph
@@ -18649,40 +19170,60 @@ float drop_ratio, uint32_t trio_flag, float trio_drop_rate, hap_cov_t *cov)
         /**********debug**********/
         cur_cons = get_graph_statistic(g);
         // fprintf(stderr, "[M::%s] 12\n", __func__);
+        // if(trio_flag == MOTHER) print_untig((ug), 9, "i-12:", 0);
+        // if(trio_flag == MOTHER) print_untig((ug), 10, "i-12:", 0);
     }
     if(just_bubble_pop == 0)
     {   
         // fprintf(stderr, "[M::%s] 13\n", __func__);
+        // if(trio_flag == MOTHER) print_untig((ug), 9, "i-13:", 0);
+        // if(trio_flag == MOTHER) print_untig((ug), 10, "i-13:", 0);
         cut_trio_tip_primary(g, ug, tipsLen, trio_flag, 0, read_g, reverse_sources, ruIndex, cov->is_r_het, 2);
+        // if(trio_flag == MOTHER) print_untig((ug), 9, "i-14:", 0);
+        // if(trio_flag == MOTHER) print_untig((ug), 10, "i-14:", 0);
         // fprintf(stderr, "[M::%s] 14\n", __func__);
     }
 
     // print_debug_gfa(read_g, ug, coverage_cut, "debug_dups", sources, ruIndex, asm_opt.max_hang_Len, asm_opt.min_overlap_Len);
     // fprintf(stderr, "[M::%s] 15\n", __func__);
+    // if(trio_flag == MOTHER) print_untig((ug), 9, "i-15:", 0);
+    // if(trio_flag == MOTHER) print_untig((ug), 10, "i-15:", 0);
     magic_trio_phasing(g, ug, read_g, coverage_cut, sources, reverse_sources, 2, ruIndex, trio_flag, trio_drop_rate); 
     // fprintf(stderr, "[M::%s] 16\n", __func__);
-
+    // if(trio_flag == MOTHER) print_untig((ug), 9, "i-16:", 0);
+    // if(trio_flag == MOTHER) print_untig((ug), 10, "i-16:", 0);
 
     // print_debug_gfa(read_g, ug, coverage_cut, "resolve_tangles", sources, ruIndex, asm_opt.max_hang_Len, asm_opt.min_overlap_Len, 0, 0, 0);
     // exit(1);
     ///bug here
     resolve_tangles(ug, read_g, reverse_sources, 20, 100, 0.05, 0.2, ruIndex, cov->is_r_het, trio_flag, drop_ratio);    
+    // if(trio_flag == MOTHER) print_untig((ug), 9, "i-17:", 0);
+    // if(trio_flag == MOTHER) print_untig((ug), 10, "i-17:", 0);
     // fprintf(stderr, "[M::%s] 17\n", __func__);
     drop_semi_circle(ug, g, read_g, reverse_sources, ruIndex, cov->is_r_het);
+    // if(trio_flag == MOTHER) print_untig((ug), 9, "i-18:", 0);
+    // if(trio_flag == MOTHER) print_untig((ug), 10, "i-18:", 0);
     // fprintf(stderr, "[M::%s] 18\n", __func__);
     all_to_all_deduplicate(ug, read_g, coverage_cut, sources, trio_flag, trio_drop_rate, reverse_sources, ruIndex, cov->is_r_het, DOUBLE_CHECK_THRES, asm_opt.trio_flag_occ_thres);
+    // if(trio_flag == MOTHER) print_untig((ug), 9, "i-19:", 0);
+    // if(trio_flag == MOTHER) print_untig((ug), 10, "i-19:", 0);
     // fprintf(stderr, "[M::%s] 19\n", __func__);
     // if(trio_flag == MOTHER) print_untig_by_read(ug, "m54329U_190827_173812/30214441/ccs", (uint32_t)-1, NULL, NULL, "bf-16");
     if(is_first)
     {
         is_first = 0;
         unitig_arc_del_short_diploid_by_length(ug->g, drop_ratio);
+        // if(trio_flag == MOTHER) print_untig((ug), 9, "i-20:", 0);
+        // if(trio_flag == MOTHER) print_untig((ug), 10, "i-20:", 0);
         // fprintf(stderr, "[M::%s] 20\n", __func__);
         goto redo;
-    }    
+    } 
+    if(p) {
+        fprintf(stderr, "[M::%s] # adjusted arcs::%u\n", __func__, p->n_insert);
+        destroy_rd_hamming_fly_simp_t(p); free(p);
+    }
 }
 
-
 void print_graph_statistic(asg_t *g, const char* cmd)
 {
     uint64_t n_arc = 0, n_node = 0, size = 0;
@@ -18714,7 +19255,7 @@ int just_bubble_pop, float drop_ratio, hap_cov_t *cov)
     redo:
     ///print_graph_statistic(g, "beg");
     ///print_debug_gfa(read_g, ug, coverage_cut, "debug_trans_ovlp_hg002", sources, ruIndex, asm_opt.max_hang_Len, asm_opt.min_overlap_Len);
-    asg_pop_bubble_primary_trio(ug, NULL, (uint32_t)-1, DROP, cov, NULL, 1);
+    asg_pop_bubble_primary_trio(ug, NULL, (uint32_t)-1, DROP, cov, NULL, 1, NULL);
     if(just_bubble_pop == 0)
     {
         cut_trio_tip_primary(g, ug, tipsLen, (uint32_t)-1, 0, read_g, reverse_sources, ruIndex, cov->is_r_het, 2);
@@ -18725,7 +19266,7 @@ int just_bubble_pop, float drop_ratio, hap_cov_t *cov)
     while(pre_cons != cur_cons)
     {   
         pre_cons = get_graph_statistic(g);
-        asg_pop_bubble_primary_trio(ug, NULL, (uint32_t)-1, DROP, cov, NULL, 1);
+        asg_pop_bubble_primary_trio(ug, NULL, (uint32_t)-1, DROP, cov, NULL, 1, NULL);
         if(just_bubble_pop == 0)
         {
             ///need consider tangles  
@@ -18777,7 +19318,7 @@ int min_ovlp, hap_cov_t *cov)
     // print_debug_gfa(read_g, ug, coverage_cut, "debug_init", sources, ruIndex, asm_opt.max_hang_Len, asm_opt.min_overlap_Len);
 
     redo:
-    asg_pop_bubble_primary_trio(ug, NULL, (uint32_t)-1, DROP, cov, o, 1);
+    asg_pop_bubble_primary_trio(ug, NULL, (uint32_t)-1, DROP, cov, o, 1, NULL);
     cut_trio_tip_primary(g, ug, tipsLen, (uint32_t)-1, 0, read_g, reverse_sources, ruIndex, cov->is_r_het, 2);
 
     long long pre_cons = get_graph_statistic(g);
@@ -18789,7 +19330,7 @@ int min_ovlp, hap_cov_t *cov)
             while(pre_cons != cur_cons)
             {    
                 pre_cons = get_graph_statistic(g);
-                asg_pop_bubble_primary_trio(ug, NULL, (uint32_t)-1, DROP, cov, o, 1);
+                asg_pop_bubble_primary_trio(ug, NULL, (uint32_t)-1, DROP, cov, o, 1, NULL);
                 
                 ///need consider tangles  
                 asg_arc_cut_trio_long_tip_primary(g, ug, read_g, reverse_sources, ruIndex, 2, tip_drop_ratio, (uint32_t)-1, cov, o);              
@@ -19338,16 +19879,105 @@ void delete_useless_nodes(ma_ug_t **ug)
     asg_cleanup(nsg);
 }
 
+inline uint32_t is_useful_node(uint32_t flag_occ, uint32_t non_flag_occ, uint32_t drop_occ, uint32_t tot_occ,
+float flag_rate, float used_rate, uint32_t min_occ)
+{
+    if((flag_occ > 0) && (flag_occ >= min_occ) && (flag_occ >= ((non_flag_occ+flag_occ+drop_occ)*flag_rate)) 
+                                                && (drop_occ <= (tot_occ*used_rate))) {
+        return 1;
+    }
+    return 0;
+}
 
 
+void recover_chain_nodes(buf_t *in, ma_ug_t *ug, uint32_t flag, float flag_rate, float used_rate, uint32_t min_occ)
+{
+    ma_utg_t *u = NULL; uint32_t flag_occ, non_flag_occ, drop_occ, rid;
+    uint32_t tot_flag_occ, tot_non_flag_occ, tot_drop_occ, tot_occ, i, k, z;
+    tot_flag_occ = tot_non_flag_occ = tot_drop_occ = tot_occ = 0;
+
+    for (i = 0; i < in->b.n; i++) {
+        u = &(ug->u.a[in->b.a[i]>>1]);
+        flag_occ = non_flag_occ = drop_occ = 0; 
+        for (k = 0; k < u->n; k++) {
+            rid = u->a[k]>>33;
+            if(R_INF.trio_flag[rid] == AMBIGU) continue;
+            else if(R_INF.trio_flag[rid] == DROP) drop_occ++;
+            else if(R_INF.trio_flag[rid] == flag) flag_occ++;
+            else if(R_INF.trio_flag[rid] != flag) non_flag_occ++;
+        }
+
+        if(is_useful_node(flag_occ, non_flag_occ, drop_occ, u->n, flag_rate, used_rate, min_occ)) {
+            ug->g->seq[in->b.a[i]>>1].c = PRIMARY_LABLE;
+        }
+        tot_flag_occ += flag_occ;
+        tot_non_flag_occ += non_flag_occ;
+        tot_drop_occ += drop_occ;
+        tot_occ += u->n;
+        if(is_useful_node(tot_flag_occ, tot_non_flag_occ, tot_drop_occ, tot_occ, flag_rate, used_rate, min_occ)) {
+            for (z = 0; z <= i; z++) {
+                if(ug->g->seq[in->b.a[z]>>1].c == ALTER_LABLE) {
+                    u = &(ug->u.a[in->b.a[z]>>1]);
+                    flag_occ = non_flag_occ = drop_occ = 0; 
+                    for (k = 0; k < u->n; k++) {
+                        rid = u->a[k]>>33;
+                        if(R_INF.trio_flag[rid] == AMBIGU) continue;
+                        else if(R_INF.trio_flag[rid] == DROP) drop_occ++;
+                        else if(R_INF.trio_flag[rid] == flag) flag_occ++;
+                        else if(R_INF.trio_flag[rid] != flag) non_flag_occ++;
+                    }
+                    if(is_useful_node(flag_occ, non_flag_occ, drop_occ, u->n, flag_rate, used_rate, 0/**min_occ**/)) {
+                        ug->g->seq[in->b.a[z]>>1].c = PRIMARY_LABLE;
+                    }
+                }
+            }
+        }
+    }
+}
+
+void rescue_useless_trio_nodes(ma_ug_t *ug, uint32_t flag, float flag_rate, float used_rate, uint32_t min_occ)
+{
+    asg_t* nsg = ug->g; 
+    uint32_t v, w, n_vtx = nsg->n_seq<<1, i, k, z;
+    long long nodeLen, baseLen, max_stop_nodeLen, max_stop_baseLen;
+    buf_t b; memset(&b, 0, sizeof(buf_t));
+
+    for (v = 0; v < n_vtx; v++) {
+        if(nsg->seq[v>>1].del) continue;
+        if(nsg->seq[v>>1].c != ALTER_LABLE) continue;
+        ///check if beg is the tig end
+        if(get_real_length(nsg, v^1, NULL) == 1) {
+            get_real_length(nsg, v^1, &w);
+            if(get_real_length(nsg, w^1, NULL) == 1) continue;
+        }
+
+        b.b.n = 0; 
+        get_unitig(nsg, ug, v, &w, &nodeLen, &baseLen, &max_stop_nodeLen, &max_stop_baseLen, 1, &b);
+        recover_chain_nodes(&b, ug, flag, flag_rate, used_rate, min_occ);
+        if(b.b.n > 1) {
+            k = b.b.n>>1;
+            for (i = 0; i < k; i++) {
+                z = b.b.a[i]; b.b.a[i] = b.b.a[b.b.n-i-1]; b.b.a[b.b.n-i-1] = z;
+            }
+            recover_chain_nodes(&b, ug, flag, flag_rate, used_rate, min_occ);
+        }
+    }
+    
+    free(b.b.a);
+    asg_cleanup(nsg);
+}
 
 void delete_useless_trio_nodes(ma_ug_t **ug, asg_t* read_g, ma_sub_t* coverage_cut, 
-ma_hit_t_alloc* sources, R_to_U* ruIndex)
+ma_hit_t_alloc* sources, R_to_U* ruIndex, uint32_t flag, float flag_rate, float used_rate, uint32_t min_occ)
 {
     asg_t* nsg = (*ug)->g;
     uint32_t v, n_vtx = nsg->n_seq;
     uint8_t* primary_flag = (uint8_t*)calloc(read_g->n_seq, sizeof(uint8_t));
 
+    if(flag_rate > 0 && used_rate > 0 && min_occ > 0) {
+        rescue_useless_trio_nodes(*ug, flag, flag_rate, used_rate, min_occ);
+    }
+
     for (v = 0; v < n_vtx; ++v) 
     {
         if(nsg->seq[v].del) continue;
@@ -19588,7 +20218,7 @@ void purge_dump(ma_ug_t* ug)
 void adjust_utg_by_trio(ma_ug_t **ug, asg_t* read_g, uint8_t flag, float drop_rate,
 ma_hit_t_alloc* sources, ma_hit_t_alloc* reverse_sources, ma_sub_t* coverage_cut, 
 long long tipsLen, float tip_drop_ratio, long long stops_threshold, 
-R_to_U* ruIndex, float chimeric_rate, float drop_ratio, int max_hang, int min_ovlp,
+R_to_U* ruIndex, float chimeric_rate, float drop_ratio, int max_hang, int min_ovlp, int gap_fuzz,
 kvec_asg_arc_t_warp* new_rtg_edges, bub_label_t* b_mask_t)
 {
     asg_t* nsg = (*ug)->g;
@@ -19632,11 +20262,11 @@ kvec_asg_arc_t_warp* new_rtg_edges, bub_label_t* b_mask_t)
     }
     // fprintf(stderr, "[M::%s] 3\n", __func__);
     clean_trio_untig_graph(*ug, read_g, coverage_cut, sources, reverse_sources, tipsLen, 
-    tip_drop_ratio, stops_threshold, ruIndex, NULL, NULL, 0, 0, 0, chimeric_rate, 0, 0, drop_ratio, flag, drop_rate, cov);
+    tip_drop_ratio, stops_threshold, ruIndex, NULL, NULL, 0, 0, 0, chimeric_rate, 0, 0, drop_ratio, flag, drop_rate, max_hang, min_ovlp, gap_fuzz, cov, new_rtg_edges);
     // fprintf(stderr, "[M::%s] 4\n", __func__);
     
     ///delete_useless_nodes(ug);
-    delete_useless_trio_nodes(ug, read_g, coverage_cut, sources, ruIndex);
+    delete_useless_trio_nodes(ug, read_g, coverage_cut, sources, ruIndex, flag, 0.8, 0.15, 16);
     // fprintf(stderr, "[M::%s] 5\n", __func__);
 
     update_hap_label(*ug, read_g);
@@ -19684,7 +20314,7 @@ kvec_asg_arc_t_warp* new_rtg_edges, bub_label_t* b_mask_t)
     // fprintf(stderr, "[M::%s] 16\n", __func__);
 
     ///delete_useless_nodes(ug);
-    delete_useless_trio_nodes(ug, read_g, coverage_cut, sources, ruIndex);
+    delete_useless_trio_nodes(ug, read_g, coverage_cut, sources, ruIndex, flag, 0.8, 0.15, 16);
     // fprintf(stderr, "[M::%s] 17\n", __func__);
 
 
@@ -19694,7 +20324,7 @@ kvec_asg_arc_t_warp* new_rtg_edges, bub_label_t* b_mask_t)
         asm_opt.purge_simi_thres, asm_opt.purge_overlap_len, max_hang, min_ovlp, drop_ratio, 1, 0, 
         cov, 0, 0);
 		///delete_useless_nodes(ug);
-        delete_useless_trio_nodes(ug, read_g, coverage_cut, sources, ruIndex);
+        delete_useless_trio_nodes(ug, read_g, coverage_cut, sources, ruIndex, flag, 0.8, 0.15, 16);
 	}
     // fprintf(stderr, "[M::%s] 18\n", __func__);
 
@@ -19724,27 +20354,49 @@ int debug_untig_length(ma_ug_t *g, uint32_t tipsLen, const char* name)
 	return 0;
 }
 
+void prt_phase_dbg_graph(char *in, asg_t *sg, ma_sub_t *cov, ma_hit_t_alloc *src, R_to_U* ri, int max_hang, int min_ovlp)
+{
+    char* gfa_name = (char*)malloc(strlen(in)+100);
+    sprintf(gfa_name, "%s.phase", in);
+    uint64_t pscut = 0;
+    pscut = (asm_opt.hom_global_coverage_set?(asm_opt.hom_global_coverage):(((double)asm_opt.hom_global_coverage)/((double)HOM_PEAK_RATE)));
+    pscut *= PHASE_SEF; if(pscut < PHASE_SEP) pscut = PHASE_SEP;
+    ma_ug_t *ug = ma_ug_gen_phase(sg, pscut, PHASE_SEP_RATE);
+    print_debug_gfa(sg, ug, cov, gfa_name, src, ri, max_hang, min_ovlp, 0, 0, 0);
+    ma_ug_destroy(ug);
 
+    sprintf(gfa_name, "%s.raw", in);
+    ug = ma_ug_gen(sg);
+    print_debug_gfa(sg, ug, cov, gfa_name, src, ri, max_hang, min_ovlp, 0, 0, 0);
+    ma_ug_destroy(ug);
+
+    free(gfa_name);
+}
 
 ma_ug_t* output_trio_unitig_graph(asg_t *sg, ma_sub_t* coverage_cut, char* output_file_name, 
 uint8_t flag, ma_hit_t_alloc* sources, ma_hit_t_alloc* reverse_sources, 
 long long tipsLen, float tip_drop_ratio, long long stops_threshold, R_to_U* ruIndex, 
-float chimeric_rate, float drop_ratio, int max_hang, int min_ovlp, int is_bench, bub_label_t* b_mask_t, 
-char *f_prefix, uint8_t *kpt_buf, kvec_asg_arc_t_warp *r_edges)
+float chimeric_rate, float drop_ratio, int max_hang, int min_ovlp, int gap_fuzz,
+int is_bench, bub_label_t* b_mask_t, char *f_prefix, uint8_t *kpt_buf, kvec_asg_arc_t_warp *r_edges)
 {
     char* gfa_name = (char*)malloc(strlen(output_file_name)+100);
     sprintf(gfa_name, "%s.%s.p_ctg.gfa", output_file_name, f_prefix?f_prefix:(flag==FATHER?"hap1":"hap2"));
     FILE* output_file = NULL;
     if(is_bench == 0) output_file = fopen(gfa_name, "w");
 
-    ma_ug_t *ug = NULL;
-    ug = ma_ug_gen(sg);
+    // prt_phase_dbg_graph(gfa_name, sg, coverage_cut, sources, ruIndex, max_hang, min_ovlp);
+
+    ma_ug_t *ug = NULL; uint64_t pscut = 0;
+    // ug = ma_ug_gen(sg);
+    pscut = (asm_opt.hom_global_coverage_set?(asm_opt.hom_global_coverage):(((double)asm_opt.hom_global_coverage)/((double)HOM_PEAK_RATE)));
+    pscut *= PHASE_SEF; if(pscut < PHASE_SEP) pscut = PHASE_SEP;
+    ug = ma_ug_gen_phase(sg, pscut, PHASE_SEP_RATE);
 
     kvec_asg_arc_t_warp new_rtg_edges;
     kv_init(new_rtg_edges.a);
     adjust_utg_by_trio(&ug, sg, flag, TRIO_THRES, sources, reverse_sources, coverage_cut, 
     tipsLen, tip_drop_ratio, stops_threshold, ruIndex, chimeric_rate, drop_ratio, max_hang, 
-    min_ovlp, &new_rtg_edges, b_mask_t);    
+    min_ovlp, gap_fuzz, &new_rtg_edges, b_mask_t);    
     if(asm_opt.b_low_cov > 0)
     {
         break_ug_contig(&ug, sg, &R_INF, coverage_cut, sources, ruIndex, &new_rtg_edges, max_hang, min_ovlp, 
@@ -19806,6 +20458,39 @@ char *f_prefix, uint8_t *kpt_buf, kvec_asg_arc_t_warp *r_edges)
 }
 
 
+void output_hap_graph(ma_ug_t *ug, asg_t *sg, kvec_asg_arc_t_warp *arcs, 
+ma_sub_t* coverage_cut, char* output_file_name, uint8_t flag, ma_hit_t_alloc* sources, 
+R_to_U* ruIndex, int max_hang, int min_ovlp, char *f_prefix)
+{
+    char* gfa_name = (char*)malloc(strlen(output_file_name)+100);
+    sprintf(gfa_name, "%s.%s.p_ctg.gfa", output_file_name, f_prefix?f_prefix:(flag==FATHER?"hap1":"hap2"));
+    FILE* output_file = fopen(gfa_name, "w");
+
+    fprintf(stderr, "Writing %s to disk... \n", gfa_name);
+    ///debug_utg_graph(ug, sg, 0, 0);
+    ///debug_untig_length(ug, tipsLen, gfa_name);
+    ///print_untig_by_read(ug, "m64011_190901_095311/125831121/ccs", 2310925, "end");
+    ma_ug_seq(ug, sg, coverage_cut, sources, arcs, max_hang, min_ovlp, 0, 1);
+    ma_ug_print(ug, sg, coverage_cut, sources, ruIndex, (flag==FATHER?"h1tg":"h2tg"), output_file);
+    fclose(output_file);
+
+    sprintf(gfa_name, "%s.%s.p_ctg.noseq.gfa", output_file_name, f_prefix?f_prefix:(flag==FATHER?"hap1":"hap2"));
+    output_file = fopen(gfa_name, "w");
+    ma_ug_print_simple(ug, sg, coverage_cut, sources, ruIndex, (flag==FATHER?"h1tg":"h2tg"), output_file);
+    fclose(output_file);
+    if(asm_opt.bed_inconsist_rate != 0)
+    {
+        sprintf(gfa_name, "%s.%s.p_ctg.lowQ.bed", output_file_name, f_prefix?f_prefix:(flag==FATHER?"hap1":"hap2"));
+        output_file = fopen(gfa_name, "w");
+        ma_ug_print_bed(ug, sg, &R_INF, coverage_cut, sources, arcs, 
+        max_hang, min_ovlp, asm_opt.bed_inconsist_rate, (flag==FATHER?"h1tg":"h2tg"), output_file, NULL);
+        fclose(output_file);
+    }
+
+    free(gfa_name);
+}
+
+
 void filter_set_kug(uint8_t* trio_flag, asg_t *rg, uint8_t *rf, kvec_asg_arc_t_warp *r_edges, float f_rate, ma_ug_t **ug)
 {
     asg_t* nsg = (*ug)->g; ma_utg_t *u = NULL;
@@ -19857,11 +20542,11 @@ int min_ovlp, int is_bench, long long gap_fuzz, ug_opt_t *opt, bub_label_t* b_ma
 
     output_trio_unitig_graph(sg, coverage_cut, output_file_name, FATHER, sources, 
         reverse_sources, tipsLen, tip_drop_ratio, stops_threshold, ruIndex, chimeric_rate, 
-        drop_ratio, max_hang, min_ovlp, is_bench, b_mask_t, NULL, rf, NULL);
+        drop_ratio, max_hang, min_ovlp, gap_fuzz, is_bench, b_mask_t, NULL, rf, NULL);
     
     output_trio_unitig_graph(sg, coverage_cut, output_file_name, MOTHER, sources, 
         reverse_sources, tipsLen, tip_drop_ratio, stops_threshold, ruIndex, chimeric_rate, 
-        drop_ratio, max_hang, min_ovlp, is_bench, b_mask_t, NULL, rf, NULL);
+        drop_ratio, max_hang, min_ovlp, gap_fuzz, is_bench, b_mask_t, NULL, rf, NULL);
 
     if(rf) {
         kvec_asg_arc_t_warp r_edges; kv_init(r_edges.a);
@@ -19872,7 +20557,7 @@ int min_ovlp, int is_bench, long long gap_fuzz, ug_opt_t *opt, bub_label_t* b_ma
         update_dump_trio(R_INF.trio_flag, sg->n_seq, rf, NULL);
         kug = output_trio_unitig_graph(sg, coverage_cut, output_file_name, FATHER, sources, 
                 reverse_sources, tipsLen, tip_drop_ratio, stops_threshold, ruIndex, chimeric_rate, 
-                drop_ratio, max_hang, min_ovlp, 1, b_mask_t, NULL, NULL, &r_edges);
+                drop_ratio, max_hang, min_ovlp, gap_fuzz, 1, b_mask_t, NULL, NULL, &r_edges);
         filter_set_kug(R_INF.trio_flag, sg, rf, &r_edges, asm_opt.kpt_rate, &kug);
         print_utg(kug, sg, coverage_cut, kug_n, sources, ruIndex, max_hang, min_ovlp, &r_edges);
         
@@ -19881,6 +20566,280 @@ int min_ovlp, int is_bench, long long gap_fuzz, ug_opt_t *opt, bub_label_t* b_ma
     }
 }
 
+dedup_idx_t *gen_dedup_idx_t(ma_ug_t *ug, asg_t *rg)
+{
+    // fprintf(stderr, "[M::%s] Start\n", __func__);
+    dedup_idx_t *p = NULL; CALLOC(p, 1);
+    p->rg = rg; p->ug = ug; p->ridx_n = rg->n_seq + 1;
+    uint64_t k, m, l, z, *a, a_n; ma_utg_t *u;
+
+    CALLOC(p->ridx, p->ridx_n); 
+    for (k = p->ra_n = 0; k < ug->u.n; k++) {
+        u = &(ug->u.a[k]); p->ra_n += u->n; 
+        if(!(u->n)) continue;
+        for (z = 0; z < u->n; z++) p->ridx[u->a[z]>>33]++;
+    }
+    for (k = l = 0; k < p->ridx_n; k++) {
+		m = p->ridx[k]; p->ridx[k] = l; l += m;
+	}
+
+
+    MALLOC(p->ra, p->ra_n); memset(p->ra, -1, sizeof((*(p->ra)))*p->ra_n);
+    for (k = 1; k < p->ridx_n; k++) {
+		a = p->ra + p->ridx[k-1];
+		a_n = p->ridx[k] - p->ridx[k-1];
+		if(a_n) a[a_n-1] = 0;
+	}
+
+    for (k = 0; k < ug->u.n; k++) {
+        u = &(ug->u.a[k]); 
+        if(!(u->n)) continue;
+        for (z = 0; z < u->n; z++) {
+            a = p->ra + p->ridx[u->a[z]>>33];
+            a_n = p->ridx[(u->a[z]>>33)+1] - p->ridx[u->a[z]>>33];
+            if(a_n) {
+				if(a[a_n-1] == a_n-1) a[a_n-1] = (k<<32)|z;
+				else a[a[a_n-1]++] = (k<<32)|z;
+			}
+        }
+    }
+    // fprintf(stderr, "[M::%s] End\n", __func__);
+    return p;
+}
+
+void destroy_dedup_idx_t(dedup_idx_t *p)
+{  
+    // fprintf(stderr, "[M::%s] Start\n", __func__); 
+    if(p == NULL) return;
+    free(p->ridx); free(p->ra); free(p);
+    // fprintf(stderr, "[M::%s] End\n", __func__);
+}
+
+void update_recover_atg_cov()
+{
+    if(asm_opt.recover_atg_cov_min == -1024) {
+        asm_opt.recover_atg_cov_max = (asm_opt.hom_global_coverage_set?
+            (asm_opt.hom_global_coverage):(((double)asm_opt.hom_global_coverage)/((double)HOM_PEAK_RATE)));         
+        asm_opt.recover_atg_cov_min = asm_opt.recover_atg_cov_max * 0.85;
+        asm_opt.recover_atg_cov_max = INT32_MAX;
+    }
+}
+
+int64_t cal_exact_ug_o(dedup_idx_t *idx, ma_utg_t *u, uint64_t f)
+{
+    if(u->n <= 0) return INT32_MIN;
+    uint64_t sv, *sa, sn, ev, *ea, en, si, ei, zn, *za, rev, k, nf = (uint64_t)-1, m, fn, nfn; ma_utg_t *z;
+    if(f == FATHER) nf = MOTHER; if(f == MOTHER) nf = FATHER;
+    sv = u->a[0]>>32; sa = idx->ra + idx->ridx[sv>>1]; sn = idx->ridx[(sv>>1)+1]-idx->ridx[sv>>1];
+    ev = u->a[u->n-1]>>32; ea = idx->ra + idx->ridx[ev>>1]; en = idx->ridx[(ev>>1)+1] - idx->ridx[ev>>1];
+    for (si = 0; si < sn; si++) {
+        z = &(idx->ug->u.a[sa[si]>>32]);
+        if((z->n == 0) || (z->m == 0) || (idx->ug->g->seq[sa[si]>>32].del)) continue;
+        assert(((z->a[(uint32_t)sa[si]]>>32)>>1) == (sv>>1));
+        if((z->a[(uint32_t)sa[si]]>>32) == sv) rev = 0;
+        else rev = 1;
+        for (ei = 0; ei < en; ei++) {
+            if((idx->ug->u.a[ea[ei]>>32].n == 0) || (idx->ug->u.a[ea[ei]>>32].m == 0) || (idx->ug->g->seq[ea[ei]>>32].del)) continue;
+            assert(((idx->ug->u.a[ea[ei]>>32].a[(uint32_t)ea[ei]]>>32)>>1) == (ev>>1));
+            if((sa[si]>>32) != (ea[ei]>>32)) continue;///not the same uid            
+            if(((uint32_t)sa[si]) >= ((uint32_t)ea[ei])) {
+                zn = ((uint32_t)sa[si]) - ((uint32_t)ea[ei]) + 1; za = z->a + ((uint32_t)ea[ei]);
+            } else {
+                zn = ((uint32_t)ea[ei]) - ((uint32_t)sa[si]) + 1; za = z->a + ((uint32_t)sa[si]);
+            }
+            if(u->n != zn) continue;
+            if(!rev) {
+                for (k = 0; (k < zn) && ((u->a[k]>>32) == ((za[k]>>32))); k++);
+                if(k < zn) continue;
+            } else {
+                for (k = 0; (k < zn) && ((u->a[k]>>32) == (((uint64_t)(za[zn-k-1]>>32))^1)); k++);
+                if(k < zn) continue;
+            }
+            
+            assert(u->n <= z->n);
+            if(u->n < z->n) {
+                return -1;///delete u
+            } else if(u->n == z->n) {
+                for (m = fn = nfn = 0; m < u->n; m++) {
+                    if(R_INF.trio_flag[u->a[m]>>33] == f) fn++;
+                    if(R_INF.trio_flag[u->a[m]>>33] == nf) nfn++;
+                }
+                if(fn < nfn) return -1;///delete u
+                else return sa[si]>>32;///delete z
+            }
+        }
+    }
+    return INT32_MIN;
+}
+
+void delete_ug_node(ma_ug_t *ug, uint64_t nid)
+{
+    asg_seq_del(ug->g, nid);
+    if(ug->u.a[nid].m!=0) {
+        ug->u.a[nid].m = ug->u.a[nid].n = 0;
+        free(ug->u.a[nid].a); ug->u.a[nid].a = NULL;
+    }
+}
+
+uint64_t dedup_exact_ug(dedup_idx_t *ref, dedup_idx_t *qry, ma_sub_t *cov, ma_hit_t_alloc *src, R_to_U *rui, uint8_t *ff, uint8_t trio_f)
+{
+    // fprintf(stderr, "[M::%s] Start\n", __func__);
+    uint64_t k, n_base = 0; int64_t f;
+    for (k = 0; k < qry->ug->u.n; k++) {
+        if((qry->ug->u.a[k].n == 0) || (qry->ug->u.a[k].m == 0) || (qry->ug->g->seq[k].del)) continue;
+        if(if_primary_unitig(&(qry->ug->u.a[k]), qry->rg, cov, src, rui, ff)) continue;
+        f = cal_exact_ug_o(ref, &(qry->ug->u.a[k]), trio_f);
+        if(f == INT32_MIN) continue;
+        if(f == -1) {///delete qry
+            delete_ug_node(qry->ug, k); n_base += qry->ug->u.a[k].len;
+        } else if(f >= 0) {///delete ref
+            delete_ug_node(ref->ug, f); n_base += ref->ug->u.a[f].len;
+        }
+    }
+    // fprintf(stderr, "[M::%s] End\n", __func__);
+    return n_base;
+}
+
+void push_ma_utg_t(ma_ug_t *ug, ma_utg_t *u)
+{
+    ma_utg_t *p; 
+    ///graph
+    asg_seq_set(ug->g, ug->u.n, u->len, 0); ug->g->seq[ug->u.n].c = 0;
+
+    //unitig
+    kv_pushp(ma_utg_t, ug->u, &p); memset(p, 0, sizeof((*p)));
+    *p = *u; p->a = NULL; p->s = NULL;
+    MALLOC(p->a, p->m); memcpy(p->a, u->a, sizeof((*(p->a)))*p->m);
+    if(u->s) {
+        MALLOC(p->s, p->len); memcpy(p->s, u->s, sizeof((*(p->s)))*p->len);
+    }
+    assert(ug->g->n_seq == ug->u.n);
+    if(p->n) {
+        p->circ = 0; p->start = (p->a[0]>>32); p->end = (p->a[p->n-1]>>32)^1;
+    }    
+}
+
+uint64_t append_miss_nid(asg_t *sg, ma_ug_t *hap0, ma_ug_t *hap1, uint8_t *ff, uint64_t len_cut, uint64_t occ_cut)
+{
+    ma_ug_t *ug = NULL; ma_utg_t *u; uint64_t k, z, pscut, n_set, fn, nfn, hap0n, hap1n, n_base = 0; 
+    kvec_asg_arc_t_warp fe; memset(&fe, 0, sizeof(fe));
+    memset(ff, 0, sizeof((*ff))*sg->n_seq);
+    ug = hap0;
+    for (k = 0; k < ug->u.n; k++) {
+        u = &(ug->u.a[k]); 
+        if((u->n == 0) || (u->m == 0) || (ug->g->seq[k].del)) continue;
+        for (z = 0; z < u->n; z++) ff[u->a[z]>>33] = 1;
+    }
+    ug = hap1;
+    for (k = 0; k < ug->u.n; k++) {
+        u = &(ug->u.a[k]); 
+        if((u->n == 0) || (u->m == 0) || (ug->g->seq[k].del)) continue;
+        for (z = 0; z < u->n; z++) ff[u->a[z]>>33] = 1;
+    }
+    ug = NULL; pscut = 0;
+    pscut = (asm_opt.hom_global_coverage_set?(asm_opt.hom_global_coverage):(((double)asm_opt.hom_global_coverage)/((double)HOM_PEAK_RATE)));
+    pscut *= PHASE_SEF; if(pscut < PHASE_SEP) pscut = PHASE_SEP;
+    ug = ma_ug_gen_phase(sg, pscut, PHASE_SEP_RATE);
+    for (k = 0; k < ug->u.n; k++) {
+        u = &(ug->u.a[k]); 
+        if((u->n == 0) || (u->m == 0) || (ug->g->seq[k].del)) continue;
+        for (z = n_set = 0; z < u->n; z++) {
+            if(ff[u->a[z]>>33]) n_set++;
+        }
+        if((n_set > 0) && (n_set >= (u->n*0.2))) delete_ug_node(ug, k);
+    }
+    renew_utg((&ug), sg, &fe);
+
+    for (k = hap0n = hap1n = 0; k < ug->u.n; k++) {
+        u = &(ug->u.a[k]); 
+        if((u->n == 0) || (u->m == 0) || (ug->g->seq[k].del)) continue;
+        if((u->len < len_cut) || (u->n < occ_cut)) continue;
+        for (z = n_set = 0; z < u->n; z++) {
+            if(ff[u->a[z]>>33]) n_set++;
+        }
+        if((n_set > 0) && (n_set >= (u->n*0.2))) continue;
+        for (z = fn = nfn = 0; z < u->n; z++) {
+            if(R_INF.trio_flag[u->a[z]>>33] == FATHER) fn++;
+            if(R_INF.trio_flag[u->a[z]>>33] == MOTHER) nfn++;
+        }
+        if(fn > nfn) {
+            push_ma_utg_t(hap0, u); hap0n++; n_base += u->len;
+        } else {
+            push_ma_utg_t(hap1, u); hap1n++; n_base += u->len;
+        }
+    }
+    ma_ug_destroy(ug); ug = NULL;
+    if(hap0n) {
+        ug = hap0; 
+        free(ug->g->idx); ug->g->idx = 0; ug->g->is_srt = 0;
+        asg_cleanup(ug->g); asg_symm(ug->g);
+        if(ug->g->seq_vis) {
+            REALLOC(ug->g->seq_vis, (ug->g->n_seq*2));
+            memset(ug->g->seq_vis, 0, sizeof((*(ug->g->seq_vis)))*(ug->g->n_seq*2));
+        }
+    }
+    if(hap1n) {
+        ug = hap1; 
+        free(ug->g->idx); ug->g->idx = 0; ug->g->is_srt = 0;
+        asg_cleanup(ug->g); asg_symm(ug->g);
+        if(ug->g->seq_vis) {
+            REALLOC(ug->g->seq_vis, (ug->g->n_seq*2));
+            memset(ug->g->seq_vis, 0, sizeof((*(ug->g->seq_vis)))*(ug->g->n_seq*2));
+        }
+    }
+    return n_base;
+}
+
+void output_trio_graph_joint(asg_t *sg, ma_sub_t* coverage_cut, char* output_file_name, 
+ma_hit_t_alloc* sources, ma_hit_t_alloc* reverse_sources, long long tipsLen, float tip_drop_ratio, 
+long long stops_threshold, R_to_U* ruIndex, float chimeric_rate, float drop_ratio, int max_hang, 
+int min_ovlp, long long gap_fuzz, bub_label_t* b_mask_t, ma_ug_t **rhu0, ma_ug_t **rhu1)
+{
+    ma_ug_t *hu0 = NULL, *hu1 = NULL; kvec_asg_arc_t_warp arcs0, arcs1; 
+    memset(&arcs0, 0, sizeof(arcs0)); memset(&arcs1, 0, sizeof(arcs1));
+
+    reduce_hamming_error_adv(NULL, sg, sources, coverage_cut, max_hang, min_ovlp, gap_fuzz, ruIndex, NULL);
+    
+    hu0 = output_trio_unitig_graph(sg, coverage_cut, output_file_name, FATHER, sources, 
+        reverse_sources, tipsLen, tip_drop_ratio, stops_threshold, ruIndex, chimeric_rate, 
+        drop_ratio, max_hang, min_ovlp, gap_fuzz, 1, b_mask_t, NULL, NULL, &arcs0);
+    
+    hu1 = output_trio_unitig_graph(sg, coverage_cut, output_file_name, MOTHER, sources, 
+        reverse_sources, tipsLen, tip_drop_ratio, stops_threshold, ruIndex, chimeric_rate, 
+        drop_ratio, max_hang, min_ovlp, gap_fuzz, 1, b_mask_t, NULL, NULL, &arcs1);
+
+    dedup_idx_t *hidx0 = NULL, *hidx1 = NULL; uint8_t *ff; CALLOC(ff, sg->n_seq);
+    hidx0 = gen_dedup_idx_t(hu0, sg); hidx1 = gen_dedup_idx_t(hu1, sg);
+    update_recover_atg_cov();
+
+    uint64_t dedup_base = 0, miss_base = 0, s;
+    s = dedup_exact_ug(hidx1, hidx0, coverage_cut, sources, ruIndex, ff, FATHER); dedup_base += s;
+    s = dedup_exact_ug(hidx0, hidx1, coverage_cut, sources, ruIndex, ff, MOTHER); dedup_base += s;
+    destroy_dedup_idx_t(hidx0); destroy_dedup_idx_t(hidx1); 
+
+    miss_base = append_miss_nid(sg, hu0, hu1, ff, PHASE_MISS_LEN, PHASE_MISS_N); free(ff);
+
+    renew_utg((&hu0), sg, &arcs0); renew_utg((&hu1), sg, &arcs1);
+    fprintf(stderr, "[M::%s] dedup_base::%lu, miss_base::%lu\n", __func__, dedup_base, miss_base);
+
+    if(!rhu0) {
+        output_hap_graph(hu0, sg, &arcs0, coverage_cut, output_file_name, FATHER, sources, ruIndex, max_hang, min_ovlp, NULL);
+        ma_ug_destroy(hu0); 
+    } else {
+        (*rhu0) = hu0;
+    }    
+    kv_destroy(arcs0.a); 
+
+    
+    if(!rhu1) {
+        output_hap_graph(hu1, sg, &arcs1, coverage_cut, output_file_name, MOTHER, sources, ruIndex, max_hang, min_ovlp, NULL);
+        ma_ug_destroy(hu1); 
+    } else {
+        ma_ug_destroy(hu1); 
+    }
+    kv_destroy(arcs1.a);
+}
+
 void output_read_graph(asg_t *sg, ma_sub_t* coverage_cut, char* output_file_name, long long n_read)
 {
     fprintf(stderr, "Writing read GFA to disk... \n");
@@ -21130,7 +22089,7 @@ int max_hang, int min_ovlp, uint8_t* trio_flag, uint8_t* vis_flag, kv_asg_arc_t*
     ma_sub_t *sq = NULL;
     ma_sub_t *st = NULL;
     int32_t r;
-    asg_arc_t t;
+    asg_arc_t t0, t1;
     
 
     for (k_i = 0; k_i < b->b.n; k_i++)
@@ -21153,18 +22112,16 @@ int max_hang, int min_ovlp, uint8_t* trio_flag, uint8_t* vis_flag, kv_asg_arc_t*
                     st = &(coverage_cut[Get_tn(*h)]);
                     if(st->del || sg->seq[Get_tn(*h)].del) continue;
                     r = ma_hit2arc(h, sq->e - sq->s, st->e - st->s, max_hang, 
-                                        asm_opt.max_hang_rate, min_ovlp, &t);
+                                        asm_opt.max_hang_rate, min_ovlp, &t0);
                 
                     ///if it is a contained read, skip
                     if(r < 0) continue;
-                    if((t.ul>>32) != v) continue;
-                    if(vis_flag[t.ul>>32] == 0 || vis_flag[t.v] == 0) continue;
-                    kv_push(asg_arc_t, *e, t);
-                    get_edge_from_source(sources, coverage_cut, NULL, max_hang, min_ovlp, 
-                                (t.v^1), ((t.ul>>32)^1), &t);
-                    kv_push(asg_arc_t, *e, t);
+                    if((t0.ul>>32) != v) continue;
+                    if(vis_flag[t0.ul>>32] == 0 || vis_flag[t0.v] == 0) continue;
+                    if(get_edge_from_source(sources, coverage_cut, NULL, max_hang, min_ovlp, (t0.v^1), ((t0.ul>>32)^1), &t1)) {
+                        kv_push(asg_arc_t, *e, t0); kv_push(asg_arc_t, *e, t1);
+                    }
                 }
-
             }
         }
     }
@@ -21210,7 +22167,7 @@ buf_t *b, uint64_t tLen, uint64_t vis_f, asg_t *res, asg64_v *sv)
 
     ma_hit_t_alloc* x = NULL;
     ma_hit_t *h; ma_sub_t *sq, *st;
-    int32_t r; asg_arc_t t, *p;
+    int32_t r; asg_arc_t t0, t1, *p;
 
     for (k_i = 0; k_i < b->b.n; k_i++) {
         if((b->b.a[k_i]>>1)==(bi>>1) || (b->b.a[k_i]>>1)==(b->S.a[0]>>1)) continue;
@@ -21224,22 +22181,22 @@ buf_t *b, uint64_t tLen, uint64_t vis_f, asg_t *res, asg64_v *sv)
                 zn = asg_arc_n(sg, v); 
                 for (i = 0; i < x->length; i++) {
                     h = &(x->buffer[i]);
-                    if(!(h->el)) continue;
+                    // if(!(h->el)) continue;
                     sq = &(cov[Get_qn(*h)]); st = &(cov[Get_tn(*h)]);
                     if(st->del || sg->seq[Get_tn(*h)].del) continue;
                     r = ma_hit2arc(h, sq->e - sq->s, st->e - st->s, max_hang, 
-                                        asm_opt.max_hang_rate, min_ovlp, &t);
+                                        asm_opt.max_hang_rate, min_ovlp, &t0);
                 
                     ///if it is a contained read, skip
                     if(r < 0) continue;
-                    if((t.ul>>32) != v) continue;
-                    if((vis_r_flag[t.ul>>32] != vis_f) || (vis_r_flag[t.v] != vis_f)) continue;
-                    for (z = 0; (z < zn) && (za[z].v != t.v); z++);
+                    if((t0.ul>>32) != v) continue;
+                    if((vis_r_flag[t0.ul>>32] != vis_f) || (vis_r_flag[t0.v] != vis_f)) continue;
+                    for (z = 0; (z < zn) && (za[z].v != t0.v); z++);
                     if(z < zn) continue;
-
-                    p = asg_arc_pushp(res); *p = t;
-                    get_edge_from_source(src, cov, NULL, max_hang, min_ovlp, (t.v^1), ((t.ul>>32)^1), &t);
-                    p = asg_arc_pushp(res); *p = t;
+                    if(get_edge_from_source(src, cov, NULL, max_hang, min_ovlp, (t0.v^1), ((t0.ul>>32)^1), &t1)) {
+                        p = asg_arc_pushp(res); *p = t0;
+                        p = asg_arc_pushp(res); *p = t1;
+                    }
                 }
 
             }
@@ -21833,12 +22790,19 @@ uint64_t rd_hm_bub(asg_t *g, asg_t *ref, uint32_t v0, uint64_t max_dist, buf_t *
     return n_pop;
 }
 
-uint64_t rd_hm_drop0(asg_t *g, asg_t *ref, uint32_t v, double cutoff)
+uint64_t rd_hm_drop0(asg_t *g, asg_t *ref, uint32_t v, double cutoff, uint32_t drop_inexact)
 {
     uint32_t nv0, nv1, mol = 0, i0, i1, ncut = 0; asg_arc_t *av0, *av1;
     nv0 = asg_arc_n(g, v); av0 = asg_arc_a(g, v);
     nv1 = asg_arc_n(ref, v); av1 = asg_arc_a(ref, v);
-    if(cutoff < 1) {
+    if(drop_inexact) {
+        for (i0 = 0; i0 < nv0; ++i0) { // loop through v's neighbors
+            if (av0[i0].del) continue;
+            if (av0[i0].el) continue;
+            av0[i0].del = 1; asg_arc_del(g, av0[i0].v^1, (av0[i0].ul>>32)^1, 1);
+            ncut++;
+        }
+    } else if(cutoff < 1) {
         for (i0 = 0; i0 < nv0; ++i0) { // loop through v's neighbors
             if (av0[i0].del) continue;
             if(mol < av0[i0].ol) mol = av0[i0].ol;
@@ -21866,7 +22830,7 @@ uint64_t rd_hm_drop0(asg_t *g, asg_t *ref, uint32_t v, double cutoff)
     return ncut;
 }
 
-uint64_t rd_hm_drop(asg_t *g, asg_t *ref, uint32_t v0, uint32_t v1, double cutoff, buf_t *b)
+uint64_t rd_hm_drop(asg_t *g, asg_t *ref, uint32_t v0, uint32_t v1, double cutoff, uint32_t drop_inexact, buf_t *b)
 {
     uint32_t i1, ncut = 0;
     uint32_t v, w, nv1, i; asg_arc_t *av1; 
@@ -21890,11 +22854,11 @@ uint64_t rd_hm_drop(asg_t *g, asg_t *ref, uint32_t v0, uint32_t v1, double cutof
     for (i = 0; i < b->b.n; ++i) { // clear the states of visited vertices
         v = b->b.a[i]; b->a[b->b.a[i]].s = 0;
         if(v == v0 || v == v1) continue;
-        ncut += rd_hm_drop0(g, ref, v, cutoff);
-        ncut += rd_hm_drop0(g, ref, v^1, cutoff);
+        ncut += rd_hm_drop0(g, ref, v, cutoff, drop_inexact);
+        ncut += rd_hm_drop0(g, ref, v^1, cutoff, drop_inexact);
     }
-    ncut += rd_hm_drop0(g, ref, v0, cutoff);
-    ncut += rd_hm_drop0(g, ref, v1^1, cutoff);
+    ncut += rd_hm_drop0(g, ref, v0, cutoff, drop_inexact);
+    ncut += rd_hm_drop0(g, ref, v1^1, cutoff, drop_inexact);
     return ncut;
 }
 
@@ -21927,8 +22891,17 @@ void rd_hamming_symm(void *data, long i, int tid) // callback for kt_for()
         return;
     }
 
+    ///drop inexact edges first
+    cuttoff = -1;
+    ncut = rd_hm_drop(s->g, s->ref, st, ed^1, cuttoff, 1, b);
+    p = rd_hm_bub(s->g, s->ref, st, max_dist, b);
+    if(p) {
+        assert(b->S.a[0] == (ed^1));
+        return;
+    }
+
     for (cuttoff = step; cuttoff < 1.0; cuttoff += step) {
-        ncut = rd_hm_drop(s->g, s->ref, st, ed^1, cuttoff, b);
+        ncut = rd_hm_drop(s->g, s->ref, st, ed^1, cuttoff, 0, b);
         p = rd_hm_bub(s->g, s->ref, st, max_dist, b);
         if(p) {
             assert(b->S.a[0] == (ed^1));
@@ -21936,7 +22909,7 @@ void rd_hamming_symm(void *data, long i, int tid) // callback for kt_for()
         }
         if(!ncut) break;
     }
-    rd_hm_drop(s->g, s->ref, st, ed^1, 1024, b);   
+    rd_hm_drop(s->g, s->ref, st, ed^1, 1024, 0, b);   
     p = rd_hm_bub(s->g, s->ref, st, max_dist, b); 
     if(p) {
         assert(b->S.a[0] == (ed^1));
@@ -21974,7 +22947,7 @@ void rd_hamming_symm_simple(rd_hamming_t *s, uint32_t st, uint32_t ed) // callba
     }
 
     for (cuttoff = step; cuttoff < 1.0; cuttoff += step) {
-        ncut = rd_hm_drop(s->g, s->ref, st, ed^1, cuttoff, b);
+        ncut = rd_hm_drop(s->g, s->ref, st, ed^1, cuttoff, 0, b);
         p = rd_hm_bub(s->g, s->ref, st, max_dist, b);
         if(p) {
             assert(b->S.a[0] == (ed^1));
@@ -21982,7 +22955,7 @@ void rd_hamming_symm_simple(rd_hamming_t *s, uint32_t st, uint32_t ed) // callba
         }
         if(!ncut) break;
     }
-    rd_hm_drop(s->g, s->ref, st, ed^1, 1024, b);   
+    rd_hm_drop(s->g, s->ref, st, ed^1, 1024, 0, b);   
     p = rd_hm_bub(s->g, s->ref, st, max_dist, b); 
     if(p) {
         assert(b->S.a[0] == (ed^1));
@@ -21990,12 +22963,70 @@ void rd_hamming_symm_simple(rd_hamming_t *s, uint32_t st, uint32_t ed) // callba
     }
 }
 
+
+uint32_t rd_hamming_symm_simple0(buf_t *b, asg_t *ref, asg_t *g, uint32_t st, uint32_t ed, uint64_t max_dist, uint64_t *r_max_dist) // callback for kt_for()
+{
+    double step = 0.2, cuttoff; 
+    uint32_t p, k, ncut;
+    p = rd_hm_bub(g, ref, st, max_dist, b);
+    if(p) {
+        assert(b->S.a[0] == (ed^1));
+        if(r_max_dist) (*r_max_dist) = max_dist;
+        return 1;
+    }
+    ///recalculate max_dist
+    p = rd_hm_bub(ref, NULL, st, max_dist, b);
+    // if(!p) {
+    //     fprintf(stderr, "[M::%s] st>>1::%u(st&1::%u), ed>>1::%u(ed&1::%u), max_dist::%lu\n", 
+    //     __func__, st>>1, st&1, ed>>1, ed&1, max_dist);
+    // }
+    assert(p); assert(b->S.a[0] == (ed^1));
+    for (k = max_dist = 0; k < b->b.n; ++k) {
+        if(b->b.a[k]==st || b->b.a[k]==b->S.a[0]) continue;
+        max_dist += ref->seq[b->b.a[k]>>1].len;
+    }
+    max_dist += ref->seq[st>>1].len;
+    max_dist += ref->seq[b->S.a[0]>>1].len;
+    p = rd_hm_bub(g, ref, st, max_dist, b);
+    if(p) {
+        assert(b->S.a[0] == (ed^1));
+        if(r_max_dist) (*r_max_dist) = max_dist;
+        return 1;
+    }
+
+    ///drop inexact edges first
+    cuttoff = -1;
+    ncut = rd_hm_drop(g, ref, st, ed^1, cuttoff, 1, b);
+    p = rd_hm_bub(g, ref, st, max_dist, b);
+    if(p) {
+        assert(b->S.a[0] == (ed^1));
+        if(r_max_dist) (*r_max_dist) = max_dist;
+        return 1;
+    }
+
+    for (cuttoff = step; cuttoff < 1.0; cuttoff += step) {
+        ncut = rd_hm_drop(g, ref, st, ed^1, cuttoff, 0, b);
+        p = rd_hm_bub(g, ref, st, max_dist, b);
+        if(p) {
+            assert(b->S.a[0] == (ed^1));
+            if(r_max_dist) (*r_max_dist) = max_dist;
+            return 1;
+        }
+        if(!ncut) break;
+    }
+
+    rd_hm_drop(g, ref, st, ed^1, 1024, 0, b);   
+    p = rd_hm_bub(g, ref, st, max_dist, b); 
+    assert(p); assert(b->S.a[0] == (ed^1));
+    if(r_max_dist) (*r_max_dist) = max_dist;
+    return 0;
+}
+
 void reduce_hamming_error_adv(ma_ug_t *iug, asg_t *sg, ma_hit_t_alloc* sources, ma_sub_t *coverage_cut, 
 int max_hang, int min_ovlp, long long gap_fuzz, R_to_U *ru, bubble_type* bub)
 {
     double index_time = yak_realtime();
-    ma_ug_t *ug = NULL; rd_hamming_t aux_t; memset((&aux_t), 0, sizeof(aux_t));
-    ug = (iug)?(iug):(ma_ug_gen_primary(sg, PRIMARY_LABLE));
+    ma_ug_t *ug = NULL; ug = (iug)?(iug):(ma_ug_gen_primary(sg, PRIMARY_LABLE));
     uint8_t* vis_flag = NULL; CALLOC(vis_flag, sg->n_seq*2);
     uint32_t fix_bub = 0; asg_t *g = ug->g;
     uint32_t v, n_vtx = g->n_seq * 2, n_arc, n_arc_0 = sg->n_arc, nv, i;
@@ -22059,8 +23090,13 @@ int max_hang, int min_ovlp, long long gap_fuzz, R_to_U *ru, bubble_type* bub)
         MALLOC(ig->seq, ig->n_seq);
         memcpy(ig->seq, sg->seq, (sizeof((*(ig->seq)))*ig->n_seq));
         asg_cleanup(ig); asg_arc_del_trans_aux(ig, sg, vis_flag, gap_fuzz);
-        aux_t.n_thread = 1/**asm_opt.thread_num**/; CALLOC(aux_t.a, aux_t.n_thread);
         REALLOC(b.a, (ig->n_seq<<1)); memset(b.a, 0, sizeof((*(b.a)))*(ig->n_seq<<1));
+
+        for (i = 0; i < sv.n; i++) rd_hamming_symm_simple0(&b, sg, ig, sv.a[i]>>32, (uint32_t)(sv.a[i]), max_dist, NULL);
+        /**
+        rd_hamming_t aux_t; memset((&aux_t), 0, sizeof(aux_t));
+        aux_t.n_thread = 1; // aux_t.n_thread = asm_opt.thread_num; 
+        CALLOC(aux_t.a, aux_t.n_thread);
         for (i = 0; i < aux_t.n_thread; i++) aux_t.a[i].a = b.a;
         aux_t.g = ig; aux_t.ref = sg; aux_t.rr = &sv; aux_t.max_dist = max_dist;
         // print_debug_gfa(ug, sg, coverage_cut, "debug_hamming", sources, ru);
@@ -22073,6 +23109,7 @@ int max_hang, int min_ovlp, long long gap_fuzz, R_to_U *ru, bubble_type* bub)
             free(aux_t.a[i].b.a); free(aux_t.a[i].e.a);
         }
         free(aux_t.a);
+        **/
     }
     free(sv.a); free(vis_flag);
 
@@ -22970,13 +24007,323 @@ uint64_t get_s_bub_pop_max_dist_advance(asg_t *g, buf_s_t *b)
     return mLen;
 }
 
+void append_node_arcs(asg_t *des, asg_t *src, uint8_t *s, uint8_t se, uint32_t v)
+{
+    asg_arc_t *av, *za; uint32_t an, zn, k, n0, n1;
+    n0 = n1 = 0;
+    za = asg_arc_a(src, v); zn = asg_arc_n(src, v); 
+    av = asg_arc_a(des, v); an = asg_arc_n(des, v); 
+    ///set
+    for (k = 0; k < zn; k++) {
+        if(za[k].del) continue;
+        s[za[k].v] |= se; n0++;
+    }
+
+    for (k = 0; k < an; k++) {
+        ///s[av[k].v]&se:: in the existing graph
+        if(s[av[k].v]&se) {
+            av[k].del = 0; n1++;
+        }
+    }
+    
+    ///reset
+    for (k = 0; k < zn; k++) {
+        if(za[k].del) continue;
+        if(s[za[k].v]&se) s[za[k].v] -= se;
+    }
+    if(!(n0 == n1)) {
+        fprintf(stderr, "[M::%s] n0::%u, n1::%u\n", __func__, n0, n1);
+    }
+    assert(n0 == n1);
+}
+
+
+static inline void asg_arc_rest(asg_t *des, asg_t *src, uint32_t v0, uint32_t w0, ma_ug_t *ug, kvec_asg_arc_t_warp *ae, ma_hit_t_alloc* src_e, ma_sub_t *cov, int32_t max_hang, int32_t min_ovlp, int32_t gap_fuzz, uint32_t *n_insert)
+{
+    uint32_t v, w, i, nv, rv, rw; asg_arc_t *av, *arc, t;
+
+    v = v0; w = w0;
+    av = asg_arc_a(des, v); nv = asg_arc_n(des, v);
+	for (i = 0; i < nv; ++i) {
+        if (av[i].v == w) {
+            av[i].del = 0; break;
+        }
+    }
+    if(i < nv) {
+        v = w0^1; w = v0^1;
+        av = asg_arc_a(des, v); nv = asg_arc_n(des, v);
+        for (i = 0; i < nv; ++i) {
+            if (av[i].v == w) {
+                av[i].del = 0; break;
+            }
+        }
+        assert(i < nv);
+        return;
+    }
+
+    ///replace a deleted arc
+    // fprintf(stderr, "[M::%s] replace\n", __func__);
+    v = v0; w = w0;
+    av = asg_arc_a(src, v); nv = asg_arc_n(src, v);
+	for (i = 0, arc = NULL; i < nv; ++i) {
+        if (av[i].v == w) {
+            av[i].del = 0; arc = &(av[i]); break;
+        }
+    }
+    assert(arc);
+    av = asg_arc_a(des, v); nv = asg_arc_n(des, v);
+    assert(nv);
+	for (i = 0; i < nv; ++i) assert(av[i].del);
+    for (i = 0; i < nv && av[i].ul < arc->ul; ++i);
+    if(i >= nv) i = nv - 1; av[i] = *arc;
+
+    v = w0^1; w = v0^1;
+    av = asg_arc_a(src, v); nv = asg_arc_n(src, v);
+    for (i = 0, arc = NULL; i < nv; ++i) {
+        if (av[i].v == w) {
+            av[i].del = 0; arc = &(av[i]); break;
+        }
+    }
+    assert(arc);
+    av = asg_arc_a(des, v); nv = asg_arc_n(des, v);
+    assert(nv);
+	for (i = 0; i < nv; ++i) assert(av[i].del);
+    for (i = 0; i < nv && av[i].ul < arc->ul; ++i);
+    if(i >= nv) i = nv - 1; av[i] = *arc;
+
+    rv = ((v0&1)?(ug->u.a[v0>>1].start^1):(ug->u.a[v0>>1].end^1));
+    rw = ((w0&1)?(ug->u.a[w0>>1].end):(ug->u.a[w0>>1].start));
+    assert(get_edge_from_source(src_e, cov, NULL, max_hang, min_ovlp, rv, rw, &t));
+    kv_push(asg_arc_t, ae->a, t);
+
+    assert(get_edge_from_source(src_e, cov, NULL, max_hang, min_ovlp, rw^1, rv^1, &t));
+    kv_push(asg_arc_t, ae->a, t);
+
+    (*n_insert) += 2;
+}
+
+uint32_t bub_pop_merge(ma_ug_t *raw_ug, ma_ug_t *new_ug, uint32_t v0, uint32_t v1, uint64_t max_dist, buf_t *b, 
+uint32_t positive_flag, uint32_t negative_flag, hap_cov_t *cov, uint32_t is_update_chain, utg_trans_t *o, kvec_asg_arc_t_warp *ae, 
+ma_hit_t_alloc* src, ma_sub_t *sub, int32_t max_hang, int32_t min_ovlp, int32_t gap_fuzz, uint32_t *n_insert)
+{
+    ///do not pop bubble within new_ug; 
+    uint32_t is_pop = asg_bub_pop1_primary_trio(new_ug->g, new_ug, v0, max_dist, b, positive_flag, negative_flag, 0, NULL, NULL, cov, is_update_chain, 0, o);
+    assert(is_pop); assert(b->S.a[0] == v1);
+
+    ///b->S.a[0] is the sink of this bubble
+    uint32_t i, v, qn, tn, tmp_c, u; asg_arc_t *a;
+    asg_t *g = raw_ug->g; tmp_c = g->seq[b->S.a[0]>>1].c;
+    
+    ///assert(b->S.n == 1);
+    ///first remove all nodes in this bubble
+    for (i = 0; i < b->b.n; ++i) g->seq[b->b.a[i]>>1].c = ALTER_LABLE;
+    
+
+    ///v is the sink of this bubble
+    v = b->S.a[0];
+    ///recover node
+    do {
+        u = b->a[v].p; // u->v
+        /****************************may have hap bugs********************************/
+        ////g->seq[v>>1].c = PRIMARY_LABLE;
+        g->seq[v>>1].c = HAP_LABLE;
+        /****************************may have hap bugs********************************/
+        v = u;
+    } while (v != v0);
+    ///especially for unitig graph, don't label beg and sink node of a bubble as HAP_LABLE
+    ///since in unitig graph, a node may consist of a lot of reads
+    g->seq[b->S.a[0]>>1].c = tmp_c;
+
+    ///remove all edges (self/reverse for each edge) in this bubble
+    for (i = 0; i < b->e.n; ++i) {
+        a = &(new_ug->g->arc[b->e.a[i]]);///note:: new_ug->g here
+        qn = a->ul>>33;
+        tn = a->v>>1;
+        if(g->seq[qn].c == ALTER_LABLE && g->seq[tn].c == ALTER_LABLE) continue;
+        ///remove this edge self
+        asg_arc_del(g, a->ul>>32, a->v, 1);
+        ///remove the reverse direction
+        asg_arc_del(g, a->v^1, a->ul>>32^1, 1);
+    }
+
+    ///v is the sink of this bubble
+    v = b->S.a[0];
+    ///recover node
+    do {
+        u = b->a[v].p; // u->v
+        g->seq[v>>1].del = 0;
+        asg_arc_rest(g, new_ug->g, u, v, raw_ug, ae, src, sub, max_hang, min_ovlp, gap_fuzz, n_insert);
+        v = u;
+    } while (v != v0);
+    return is_pop;
+}
+
+uint64_t renew_phase_bubble(rd_hamming_fly_simp_t *pf, uint64_t v0, buf_t *b, ma_ug_t *ug, uint64_t max_dist, 
+uint32_t positive_flag, uint32_t negative_flag, hap_cov_t *cov, utg_trans_t *o, uint32_t is_update_chain)
+{
+    uint64_t v, k, i, v1 = b->S.a[0], is_update, is_pop = 0; ma_ug_t *fg = pf->fg;
+    uint8_t *s = pf->vs; asg32_v *bc = pf->srt; uint8_t sn = 1, se = 2;
+    bc->n = 0; kv_resize(uint32_t, (*bc), b->b.n);
+    assert((fg->u.a[v0>>1].len == ug->u.a[v0>>1].len) && (fg->u.a[v0>>1].n == ug->u.a[v0>>1].n));
+    assert((fg->u.a[v1>>1].len == ug->u.a[v1>>1].len) && (fg->u.a[v1>>1].n == ug->u.a[v1>>1].n));
+    ///b->S.a[0] is the sink of this bubble
+    for (i = 0; i < b->b.n; i++) {
+        v = b->b.a[i];
+        if((v == v0) || (v == v1)) continue;
+        s[v] = sn;
+        kv_push(uint32_t, *bc, v);
+        assert((fg->u.a[v>>1].len == ug->u.a[v>>1].len) && (fg->u.a[v>>1].n == ug->u.a[v>>1].n));
+    }
+    
+    asg_arc_t *av, *za, *ra; uint32_t an, zn, rn, ri;
+    v = v0;
+    av = asg_arc_a(fg->g, v); 
+    an = asg_arc_n(fg->g, v); 
+    for (k = 0; k < an; k++) av[k].del = 1;
+    fg->g->seq[v>>1].c = ug->g->seq[v>>1].c;
+
+
+    v = v1^1; 
+    av = asg_arc_a(fg->g, v); 
+    an = asg_arc_n(fg->g, v); 
+    for (k = 0; k < an; k++) av[k].del = 1;
+    fg->g->seq[v>>1].c = ug->g->seq[v>>1].c;
+
+
+    for (i = 0; i < bc->n; i++) {
+        v = bc->a[i];
+        av = asg_arc_a(fg->g, v); an = asg_arc_n(fg->g, v); 
+        for (k = 0; k < an; k++) av[k].del = 1;
+
+        v ^= 1;
+        av = asg_arc_a(fg->g, v); an = asg_arc_n(fg->g, v); 
+        for (k = 0; k < an; k++) av[k].del = 1;
+
+        fg->g->seq[v>>1].c = ug->g->seq[v>>1].c;
+    }
+
+
+
+
+    for (i = 0; i < bc->n; i++) {
+        v = bc->a[i];
+        za = asg_arc_a(ug->g, v); zn = asg_arc_n(ug->g, v); 
+        av = asg_arc_a(fg->g, v); an = asg_arc_n(fg->g, v); 
+
+        ///set
+        for (k = 0; k < zn; k++) {
+            if((za[k].del) || (!s[za[k].v])) continue;
+            s[za[k].v] |= se;
+        }
+
+        
+        for (k = 0; k < an; k++) {
+            ///s[av[k].v]&se:: in the existing graph
+            if((!s[av[k].v]) || (s[av[k].v]&se)) continue; ///in the existing graph
+            if((av[k].v) == (v>>1)) continue;
+            av[k].del = 0;
+            ra = asg_arc_a(fg->g, (av[k].v^1)); rn = asg_arc_n(fg->g, (av[k].v^1));
+            for (ri = 0; ri < rn; ri++) {
+                if(ra[ri].v == ((av[k].ul>>32)^1)) {
+                    ra[ri].del = 0; break;
+                }
+            }
+            assert(ri < rn);
+        }
+
+        ///reset
+        for (k = 0; k < zn; k++) {
+            if((za[k].del) || (!s[za[k].v])) continue;
+            if(s[za[k].v]&se) s[za[k].v] -= se;
+        }
+    }
+
+
+    is_update = rd_hamming_symm_simple0(b, ug->g, fg->g, v0, v1^1, max_dist, &max_dist);
+    // fprintf(stderr, "[M::%s] is_update::%lu\n", __func__, is_update);
+    if(is_update) {
+        for (i = 0; i < bc->n; i++) {
+            append_node_arcs(fg->g, ug->g, s, se, bc->a[i]);
+            append_node_arcs(fg->g, ug->g, s, se, bc->a[i]^1);
+        }
+        append_node_arcs(fg->g, ug->g, s, se, v0);
+        append_node_arcs(fg->g, ug->g, s, se, v1^1);
+        is_pop = bub_pop_merge(ug, fg, v0, v1, max_dist, b, positive_flag, negative_flag, cov, is_update_chain, o, pf->ae, pf->src, pf->cov, pf->max_hang, pf->min_ovlp, pf->gap_fuzz, &(pf->n_insert));
+    } else {
+        is_pop = asg_bub_pop1_primary_trio(ug->g, ug, v0, max_dist, b, positive_flag, negative_flag, 1, NULL, NULL, cov, is_update_chain, 0, o);
+    }
+
+
+
+    for (i = 0; i < bc->n; i++) s[bc->a[i]] = 0;
+    ///reset
+    v = v0;
+    av = asg_arc_a(fg->g, v); 
+    an = asg_arc_n(fg->g, v); 
+    for (k = 0; k < an; k++) av[k].del = 0;
+    fg->g->seq[v>>1].c = PRIMARY_LABLE;
+
+
+    v = v1^1; 
+    av = asg_arc_a(fg->g, v); 
+    an = asg_arc_n(fg->g, v); 
+    for (k = 0; k < an; k++) av[k].del = 0;
+    fg->g->seq[v>>1].c = PRIMARY_LABLE;
+
+
+    for (i = 0; i < bc->n; i++) {
+        v = bc->a[i];
+        av = asg_arc_a(fg->g, v); 
+        an = asg_arc_n(fg->g, v); 
+        for (k = 0; k < an; k++) av[k].del = 0;
+
+        v ^= 1;
+        av = asg_arc_a(fg->g, v); 
+        an = asg_arc_n(fg->g, v); 
+        for (k = 0; k < an; k++) av[k].del = 0;
+
+        fg->g->seq[v>>1].c = PRIMARY_LABLE;
+    }
+    return is_pop;
+}
+
+uint64_t refine_bubble_popping(ma_ug_t *ug, buf_t *b, uint32_t v0, uint64_t max_dist, uint32_t positive_flag, uint32_t negative_flag, hap_cov_t *cov, utg_trans_t *o, uint32_t is_update_chain, rd_hamming_fly_simp_t *pf)
+{
+    // fprintf(stderr, "[M::%s]\n", __func__);
+    if(!asg_bub_pop1_primary_trio(ug->g, ug, v0, max_dist, b, positive_flag, negative_flag, 0, NULL, NULL, NULL, 0, 0, NULL)) return 0;
+    uint32_t non_positive_flag = (uint32_t)-1, v, u, k, rId, pn, npn;
+    if(positive_flag == FATHER) non_positive_flag = MOTHER;
+    if(positive_flag == MOTHER) non_positive_flag = FATHER;
+    ma_utg_t* p = NULL;
+    ///b->S.a[0] is the sink of this bubble
+    ///v is the sink of this bubble
+    v = b->S.a[0]; pn = npn = 0;
+    ///scan node
+    do {
+        u = b->a[v].p; // u->v
+        if(v != b->S.a[0]) {
+            p = &(ug->u.a[v>>1]);
+            for (k = 0; k < p->n; k++) {
+                rId = p->a[k]>>33;
+                if(R_INF.trio_flag[rId] == positive_flag) pn++;
+                if(R_INF.trio_flag[rId] == non_positive_flag) npn++;
+            }
+        }
+        v = u;
+    } while (v != v0);
+    // fprintf(stderr, "[M::%s] pn::%u, npn::%u\n", __func__, pn, npn);
+    ///debug
+    if((npn <= 0) || ((npn <= ((npn+pn)*0.05)) && (npn <= 64))) {///phasing is ok
+        return asg_bub_pop1_primary_trio(ug->g, ug, v0, max_dist, b, positive_flag, negative_flag, 1, NULL, NULL, cov, is_update_chain, 0, o);
+    }
+    return renew_phase_bubble(pf, v0, b, ug, max_dist, positive_flag, negative_flag, cov, o, is_update_chain);
+}
 
 // pop bubbles
-int asg_pop_bubble_primary_trio(ma_ug_t *ug, uint64_t* i_max_dist, uint32_t positive_flag, uint32_t negative_flag, hap_cov_t *cov, utg_trans_t *o, uint32_t is_update_chain)
+int asg_pop_bubble_primary_trio(ma_ug_t *ug, uint64_t* i_max_dist, uint32_t positive_flag, uint32_t negative_flag, hap_cov_t *cov, utg_trans_t *o, uint32_t is_update_chain, rd_hamming_fly_simp_t *p)
 {
-    asg_t *g = ug->g;
+    asg_t *g = ug->g; uint64_t n_pop = 0, max_dist;
 	uint32_t v, n_vtx = g->n_seq * 2, n_arc, nv, i;
-	uint64_t n_pop = 0, max_dist;
     asg_arc_t *av = NULL;
 	buf_t b;
 	if (!g->is_symm) asg_symm(g);
@@ -23023,8 +24370,13 @@ int asg_pop_bubble_primary_trio(ma_ug_t *ug, uint64_t* i_max_dist, uint32_t posi
             ///some edges could be deleted
             for (i = n_arc = 0; i < nv; ++i) // asg_bub_pop1() may delete some edges/arcs
                 if (!av[i].del) ++n_arc;
-            if (n_arc > 1)
-                n_pop += asg_bub_pop1_primary_trio(ug->g, ug, v, max_dist, &b, positive_flag, negative_flag, 1, NULL, NULL, cov, is_update_chain, 0, o);
+            if (n_arc > 1) {
+                if(p){
+                    n_pop += refine_bubble_popping(ug, &b, v, max_dist, positive_flag, negative_flag, cov, o, is_update_chain, p);
+                } else {
+                    n_pop += asg_bub_pop1_primary_trio(ug->g, ug, v, max_dist, &b, positive_flag, negative_flag, 1, NULL, NULL, cov, is_update_chain, 0, o);
+                }
+            }
         }
         
         if(VERBOSE >= 1)
@@ -28714,7 +30066,7 @@ R_to_U* ruIndex, int max_hang, int min_ovlp, const ug_opt_t *uopt)
 
     if(bubble_dist > 0)
     {
-        asg_pop_bubble_primary_trio(ug, &bubble_dist, (uint32_t)-1, DROP, NULL, NULL, 0);
+        asg_pop_bubble_primary_trio(ug, &bubble_dist, (uint32_t)-1, DROP, NULL, NULL, 0, NULL);
         delete_useless_nodes(&ug);
         renew_utg(&ug, sg, &new_rtg_edges);
     }
@@ -35043,19 +36395,21 @@ ma_sub_t **coverage_cut_ptr, int debug_g)
     {
         if(asm_opt.flag & HA_F_PARTITION) asm_opt.flag -= HA_F_PARTITION;
         output_poly_trio(sg, coverage_cut, o_file, sources, reverse_sources, (asm_opt.max_short_tip*2), 0.15, 3, ruIndex, 
-        0.05, 0.9, max_hang_length, mini_overlap_length, 0, &b_mask_t, asm_opt.polyploidy);
+        0.05, 0.9, max_hang_length, mini_overlap_length, gap_fuzz, 0, &b_mask_t, asm_opt.polyploidy);
     }
     else if (ha_opt_triobin(&asm_opt) && ha_opt_hic(&asm_opt))
     {
         if(asm_opt.flag & HA_F_PARTITION) asm_opt.flag -= HA_F_PARTITION;
         benchmark_hic_graph(sg, coverage_cut, o_file, sources, reverse_sources, (asm_opt.max_short_tip*2), 0.15, 3, 
-        ruIndex, 0.05, 0.9, max_hang_length, mini_overlap_length, &b_mask_t);
+        ruIndex, 0.05, 0.9, max_hang_length, mini_overlap_length, gap_fuzz, &b_mask_t);
     }
     else if (ha_opt_triobin(&asm_opt))
     {   
         if(asm_opt.flag & HA_F_PARTITION) asm_opt.flag -= HA_F_PARTITION;
-        output_trio_graph(sg, coverage_cut, o_file, sources, reverse_sources, (asm_opt.max_short_tip*2), 
-        0.15, 3, ruIndex, 0.05, 0.9, max_hang_length, mini_overlap_length, 0, gap_fuzz, &uopt, &b_mask_t);
+        // output_trio_graph(sg, coverage_cut, o_file, sources, reverse_sources, (asm_opt.max_short_tip*2), 
+        // 0.15, 3, ruIndex, 0.05, 0.9, max_hang_length, mini_overlap_length, 0, gap_fuzz, &uopt, &b_mask_t);
+        output_trio_graph_joint(sg, coverage_cut, o_file, sources, reverse_sources, (asm_opt.max_short_tip*2), 
+        0.15, 3, ruIndex, 0.05, 0.9, max_hang_length, mini_overlap_length, gap_fuzz, &b_mask_t, NULL, NULL);
     }
     else if(ha_opt_hic(&asm_opt))
     {
@@ -35074,7 +36428,7 @@ ma_sub_t **coverage_cut_ptr, int debug_g)
     else if((asm_opt.flag & HA_F_PARTITION) && (asm_opt.purge_level_primary > 0))
     {
         output_bp_graph(sg, coverage_cut, o_file, sources, reverse_sources, (asm_opt.max_short_tip*2), 
-        0.15, 3, ruIndex, 0.05, 0.9, max_hang_length, mini_overlap_length, &b_mask_t/**, &uopt**/);
+        0.15, 3, ruIndex, 0.05, 0.9, max_hang_length, mini_overlap_length, &b_mask_t, gap_fuzz, &uopt);
     }
     else
     {
diff --git a/Additional_src/Modified_hifiasm/Overlaps.h b/Additional_src/Modified_hifiasm/Overlaps.h
index 65d116e..955e528 100644
--- a/Additional_src/Modified_hifiasm/Overlaps.h
+++ b/Additional_src/Modified_hifiasm/Overlaps.h
@@ -1066,7 +1066,7 @@ ma_ug_t* copy_untig_graph(ma_ug_t *src);
 ma_ug_t* output_trio_unitig_graph(asg_t *sg, ma_sub_t* coverage_cut, char* output_file_name, 
 uint8_t flag, ma_hit_t_alloc* sources, ma_hit_t_alloc* reverse_sources, 
 long long tipsLen, float tip_drop_ratio, long long stops_threshold, R_to_U* ruIndex, 
-float chimeric_rate, float drop_ratio, int max_hang, int min_ovlp, int is_bench, bub_label_t* b_mask_t, char *f_prefix, uint8_t *kpt_buf, kvec_asg_arc_t_warp *r_edges);
+float chimeric_rate, float drop_ratio, int max_hang, int min_ovlp, int gap_fuzz, int is_bench, bub_label_t* b_mask_t, char *f_prefix, uint8_t *kpt_buf, kvec_asg_arc_t_warp *r_edges);
 asg_t* copy_read_graph(asg_t *src);
 ma_ug_t *ma_ug_gen(asg_t *g);
 void ma_ug_destroy(ma_ug_t *ug);
@@ -1135,7 +1135,7 @@ void adjust_utg_by_trio(ma_ug_t **ug, asg_t* read_g, uint8_t flag, float drop_ra
 ma_hit_t_alloc* sources, ma_hit_t_alloc* reverse_sources, ma_sub_t* coverage_cut, 
 long long tipsLen, float tip_drop_ratio, long long stops_threshold, 
 R_to_U* ruIndex, float chimeric_rate, float drop_ratio, int max_hang, int min_ovlp,
-kvec_asg_arc_t_warp* new_rtg_edges, bub_label_t* b_mask_t);
+int gap_fuzz, kvec_asg_arc_t_warp* new_rtg_edges, bub_label_t* b_mask_t);
 uint32_t cmp_untig_graph(ma_ug_t *src, ma_ug_t *dest);
 void reduce_hamming_error(asg_t *sg, ma_hit_t_alloc* sources, ma_sub_t *coverage_cut, 
 int max_hang, int min_ovlp, long long gap_fuzz);
@@ -1233,5 +1233,7 @@ void destory_ug_rid_cov_t(ug_rid_cov_t *p);
 uint32_t append_cov_line_ug_rid_cov_t(uint64_t uid, uint64_t *qcc, u_trans_t *p, ug_rid_cov_t *idx, uint64_t hom_cut, double cut_rate);
 uint64_t infer_mmhap_copy(ma_ug_t *ug, asg_t *sg, ma_hit_t_alloc *src, uint8_t *ff, uint64_t uid, uint64_t het_cov, uint64_t n_hap);
 uint64_t trans_sec_cut0(kv_u_trans_t *ta, asg64_v *srt, uint32_t id, double sec_rate, uint64_t bd, ma_ug_t *ug);
+void clean_u_trans_t_idx_filter_mmhap_adv(kv_u_trans_t *ta, ma_ug_t *ug, asg_t *read_g, ma_hit_t_alloc* src, ug_rid_cov_t *in);
+void gen_ug_rid_cov_t_by_ovlp(kv_u_trans_t *ta, ug_rid_cov_t *cc);
 
 #endif
diff --git a/Additional_src/Modified_hifiasm/README.md b/Additional_src/Modified_hifiasm/README.md
index c081c5a..7b3337d 100644
--- a/Additional_src/Modified_hifiasm/README.md
+++ b/Additional_src/Modified_hifiasm/README.md
@@ -22,6 +22,9 @@ hifiasm -o HG002.asm --h1 read1.fq.gz --h2 read2.fq.gz HG002-HiFi.fq.gz
 yak count -b37 -t16 -o pat.yak <(cat pat_1.fq.gz pat_2.fq.gz) <(cat pat_1.fq.gz pat_2.fq.gz)
 yak count -b37 -t16 -o mat.yak <(cat mat_1.fq.gz mat_2.fq.gz) <(cat mat_1.fq.gz mat_2.fq.gz)
 hifiasm -o HG002.asm -t32 -1 pat.yak -2 mat.yak HG002-HiFi.fa.gz
+
+# Single-sample telomere-to-telomere assembly with HiFi, ultralong and Hi-C reads
+hifiasm -o HG002.asm --h1 read1.fq.gz --h2 read2.fq.gz --ul ul.fq.gz HG002-HiFi.fq.gz
 ```
 See [tutorial][tutorial] for more details. 
 
@@ -43,15 +46,12 @@ See [tutorial][tutorial] for more details.
 
 ## <a name="intro"></a>Introduction
 
-Hifiasm is a fast haplotype-resolved de novo assembler for PacBio HiFi reads.
-It can assemble a human genome in several hours and assemble a ~30Gb California
-redwood genome in a few days. Hifiasm emits partially phased assemblies of
-quality competitive with the best assemblers. Given parental short reads or
-Hi-C data, it produces arguably the best haplotype-resolved assemblies so far.
+Hifiasm is a fast haplotype-resolved de novo assembler initially designed for PacBio HiFi reads.
+Its latest release could support the telomere-to-telomere assembly by utilizing ultralong Oxford Nanopore reads. Hifiasm produces arguably the best single-sample telomere-to-telomere assemblies combing HiFi, ultralong and Hi-C reads, and it is one of the best haplotype-resolved assemblers for the trio-binning assembly given parental short reads. For a human genome, hifiasm can produce the telomere-to-telomere assembly in one day.
 
 ## <a name="why"></a>Why Hifiasm?
 
-* Hifiasm delivers high-quality assemblies. It tends to generate longer contigs
+* Hifiasm delivers high-quality telomere-to-telomere assemblies. It tends to generate longer contigs
   and resolve more segmental duplications than other assemblers.
 
 * Given Hi-C reads or short reads from the parents, hifiasm can produce overall the best
@@ -146,11 +146,18 @@ The second command line will run much faster than the first.
 
 ### <a name="ul"></a>Ultra-long ONT integration
 
-Hifiasm could integrate ultra-long ONT reads to improve the assembly quality:
+Hifiasm could integrate ultra-long ONT reads to produce the telomere-to-telomere assembly:
 ```sh
 hifiasm -o NA12878.asm -t32 --ul ul.fq.gz HiFi-reads.fq.gz
 ```
-Please note that this mode is not stable right now. We have only tested with >=100kb UL reads.
+For the single-sample telomere-to-telomere assembly with Hi-C reads:
+```sh
+hifiasm -o NA12878.asm -t32 --ul ul.fq.gz --h1 read1.fq.gz --h2 read2.fq.gz HiFi-reads.fq.gz
+```
+For the trio-binning telomere-to-telomere assembly；
+```sh
+hifiasm -o NA12878.asm -t32 --ul ul.fq.gz -1 pat.yak -2 mat.yak HiFi-reads.fq.gz
+```
 
 ### <a name="output"></a>Output files
 
diff --git a/Additional_src/Modified_hifiasm/gfa_ut.cpp b/Additional_src/Modified_hifiasm/gfa_ut.cpp
index 89e2860..dbc04a2 100644
--- a/Additional_src/Modified_hifiasm/gfa_ut.cpp
+++ b/Additional_src/Modified_hifiasm/gfa_ut.cpp
@@ -7365,7 +7365,7 @@ void rebuid_idx(ul_resolve_t *uidx)
     init_ul_str_idx_t(uidx);
 }
 
-void shrink_1b(ma_ug_t *ug, uc_block_t *z, uint32_t is_forward)
+void shrink_1b(ma_ug_t *ug, uc_block_t *z, uc_block_t *lim, uint32_t is_forward)
 {
     if(z->ts != 0 || z->te != ug->g->seq[z->hid].len) return;
     uc_block_t bc = *z;
@@ -7377,12 +7377,14 @@ void shrink_1b(ma_ug_t *ug, uc_block_t *z, uint32_t is_forward)
             } else {
                 z->te -= 1; z->qs += off;
             }
+            if((lim) && (!((z->qs <= lim->qs) && (z->qe <= lim->qe)))) *z = bc;
         } else {
             if((!z->rev)) {
                 z->te -= 1; z->qe -= off;
             } else {
                 z->ts += 1; z->qe -= off;
             }
+            if((lim) && (!((z->qs >= lim->qs) && (z->qe >= lim->qe)))) *z = bc;
         }
         if((ugl_cover_check(bc.ts, bc.te, &(ug->u.a[bc.hid]))) && 
                                             (!ugl_cover_check(z->ts, z->te, &(ug->u.a[z->hid])))) {
@@ -7436,7 +7438,7 @@ void renew_ul_vec_t(ul_vec_t *x, ma_ug_t *ug)
 
 void shrink_ul0(all_ul_t *uls, ul_str_t *str, uint64_t id, integer_t *buf, ma_ug_t *ug)
 {
-    uint32_t k, c_k, p_k, cv, pv, bl, i; uc_block_t *xi; buf->u.n = 0; nid_t *np = NULL;
+    uint32_t k, c_k, p_k, cv, pv, bl, i; uc_block_t *xi, *yi; buf->u.n = 0; nid_t *np = NULL;
     asg_arc_t *av; uint32_t nv, s, e, m, d, mm, is_conn; uint64_t *z; ul_vec_t *x;
     if(str->cn < 2) return;
     for (k = 0, bl = 0, c_k = p_k = pv = (uint32_t)-1; k < str->cn; k++) {
@@ -7464,6 +7466,11 @@ void shrink_ul0(all_ul_t *uls, ul_str_t *str, uint64_t id, integer_t *buf, ma_ug
                 is_conn = 1;
             }
         }
+        if(is_conn) {
+            is_conn = 0; assert(k);
+            yi = &(uls->a[id].bb.a[str->a[k-1]>>32]);
+            if((xi->qs >= yi->qs) && (xi->qe >= yi->qe)) is_conn = 1;
+        }
         if(is_conn) {
             bl++;
         } else {
@@ -7514,10 +7521,9 @@ void shrink_ul0(all_ul_t *uls, ul_str_t *str, uint64_t id, integer_t *buf, ma_ug
             x->bb.n = m;
             assert(x->bb.n > 1);
 
-
-
-            shrink_1b(ug, &(x->bb.a[0]), 1);
-            shrink_1b(ug, &(x->bb.a[x->bb.n-1]), 0);
+            shrink_1b(ug, &(x->bb.a[0]), ((x->bb.n>=2)?&(x->bb.a[1]):(NULL)), 1);
+            shrink_1b(ug, &(x->bb.a[x->bb.n-1]), ((x->bb.n>=2)?&(x->bb.a[x->bb.n-2]):(NULL)), 0);
+            
             d = x->bb.a[0].qs;
             for (k = 0; k < x->bb.n; k++) {
                 x->bb.a[k].qs -= d; x->bb.a[k].qe -= d;
@@ -7546,8 +7552,8 @@ void shrink_ul0(all_ul_t *uls, ul_str_t *str, uint64_t id, integer_t *buf, ma_ug
         }
         x->bb.n = m;
         assert(x->bb.n > 1);
-        shrink_1b(ug, &(x->bb.a[0]), 1);
-        shrink_1b(ug, &(x->bb.a[x->bb.n-1]), 0);
+        shrink_1b(ug, &(x->bb.a[0]), ((x->bb.n>=2)?&(x->bb.a[1]):(NULL)), 1);
+        shrink_1b(ug, &(x->bb.a[x->bb.n-1]), ((x->bb.n>=2)?&(x->bb.a[x->bb.n-2]):(NULL)), 0);
         d = x->bb.a[0].qs;
         for (k = 0; k < x->bb.n; k++) {
             x->bb.a[k].qs -= d; x->bb.a[k].qe -= d;
@@ -15686,6 +15692,9 @@ void u2g_hybrid_clean(ul_resolve_t *uidx, ulg_opt_t *ulopt, usg_t *ng, asg64_v *
         // prt_usg_t(uidx, ng, sb);
         // usg_arc_cut_length(ng, b, ub, mm_tip>>1, drop, ulopt->is_trio, 1, NULL);
         usg_bub_clean(ng, &bb, b, ub, mm_tip>>1, drop, 1, bs, f);
+        // fprintf(stderr, "-1bub-[M::%s::] i::%ld, drop::%f\n", __func__, i, drop);
+        // sprintf(sb, "ng_ss::%ld_i::%ld_drop::%f_b::bub", ss, i, drop);
+        // prt_usg_t(uidx, ng, sb);
         usg_arc_cut_srt_length(ng, b, ub, mm_tip>>1, drop, ulopt->is_trio, 1, NULL, bs);
         // fprintf(stderr, "-1-[M::%s::] i::%ld, drop::%f\n", __func__, i, drop);
         // sprintf(sb, "ng_ss::%ld_i::%ld_drop::%f_b", ss, i, drop);
@@ -16580,7 +16589,7 @@ ma_ug_t* output_trio_unitig_graph_ul(ug_opt_t *uopt, ul_resolve_t *uidx, char* o
 
     adjust_utg_by_trio(&ug, uidx->sg, flag, TRIO_THRES, uopt->sources, uopt->reverse_sources, 
     uopt->coverage_cut, uopt->tipsLen, uopt->tip_drop_ratio, uopt->stops_threshold, uopt->ruIndex, 
-    uopt->chimeric_rate, uopt->drop_ratio, uopt->max_hang, uopt->min_ovlp, &ne, uopt->b_mask_t);    
+    uopt->chimeric_rate, uopt->drop_ratio, uopt->max_hang, uopt->min_ovlp, uopt->gap_fuzz, &ne, uopt->b_mask_t);    
 
     // if(asm_opt.b_low_cov > 0) {
     //     break_ug_contig(&ug, uidx->sg, &R_INF, uopt->coverage_cut, uopt->sources, uopt->ruIndex, &ne, 
diff --git a/Additional_src/Modified_hifiasm/horder.cpp b/Additional_src/Modified_hifiasm/horder.cpp
index f5faa44..7a6353f 100644
--- a/Additional_src/Modified_hifiasm/horder.cpp
+++ b/Additional_src/Modified_hifiasm/horder.cpp
@@ -781,7 +781,7 @@ ma_ug_t* get_trio_unitig_graph(asg_t *sg, uint8_t flag, ug_opt_t *opt)
     adjust_utg_by_trio(&ug, sg, flag, TRIO_THRES, opt->sources, opt->reverse_sources, 
     opt->coverage_cut, opt->tipsLen, opt->tip_drop_ratio, opt->stops_threshold, 
     opt->ruIndex, opt->chimeric_rate, opt->drop_ratio, opt->max_hang, opt->min_ovlp,
-    &new_rtg_edges, opt->b_mask_t);
+    opt->gap_fuzz, &new_rtg_edges, opt->b_mask_t);
 
     kv_destroy(new_rtg_edges.a);
     return ug;
diff --git a/Additional_src/Modified_hifiasm/inter.cpp b/Additional_src/Modified_hifiasm/inter.cpp
index 2cf8218..b093f6a 100644
--- a/Additional_src/Modified_hifiasm/inter.cpp
+++ b/Additional_src/Modified_hifiasm/inter.cpp
@@ -10434,6 +10434,202 @@ static void worker_for_trans_ovlp_mmhap_adv(void *data, long i, int tid) // call
 	s->free_cnt[tid]++;
 }
 
+uint32_t tranfor_ovlp(u_trans_t *qovlp, u_trans_t *tovlp, asg_t *g, ul_ov_t *res, uint32_t adjust_rev)
+{
+    int64_t os, oe, s_shift, e_shift, tt, qs, qe, ts, te; 
+    os = MAX(qovlp->ts, tovlp->qs);
+    oe = MIN(qovlp->te, tovlp->qe);
+    if(oe <= os) return 0;
+
+    ///[os, oe) -> qovlp->t*
+    s_shift = get_offset_adjust(os-qovlp->ts, qovlp->te-qovlp->ts, qovlp->qe-qovlp->qs);
+    e_shift = get_offset_adjust(qovlp->te-oe, qovlp->te-qovlp->ts, qovlp->qe-qovlp->qs);
+    if(qovlp->rev) {
+        tt = s_shift; s_shift = e_shift; e_shift = tt;
+    }
+    qs = qovlp->qs+s_shift; qe = ((int64_t)qovlp->qe)-e_shift; 
+    if(qs >= qe) return 0;
+
+    ///[os, oe) -> tovlp->q*
+    s_shift = get_offset_adjust(os-tovlp->qs, tovlp->qe-tovlp->qs, tovlp->te-tovlp->ts);
+    e_shift = get_offset_adjust(tovlp->qe-oe, tovlp->qe-tovlp->qs, tovlp->te-tovlp->ts);
+    if(tovlp->rev) {
+        tt = s_shift; s_shift = e_shift; e_shift = tt;
+    }
+    ts = tovlp->ts+s_shift; te = ((int64_t)tovlp->te)-e_shift;
+    if(ts >= te) return 0;
+
+    memset(res, 0, sizeof(*res));
+    res->qn = qovlp->qn; res->qs = qs; res->qe = qe; 
+    res->tn = tovlp->tn; res->ts = ts; res->te = te; 
+    res->rev = ((qovlp->rev == tovlp->rev)?0:1);
+    if(adjust_rev && res->rev) {///for linear chaining
+        res->ts = g->seq[res->tn].len - te;
+        res->te = g->seq[res->tn].len - ts;
+    }
+    return 1;
+}
+
+uint32_t rescue_adject_ovlp(asg_t *g, uint32_t id, kv_u_trans_t *ta, kv_ul_ov_t *out, st_mt_t *buf)
+{
+	u_trans_t *a, *b; ul_ov_t rr; uint64_t a_n, b_n, k, l, i, z, m; 
+	a = u_trans_a((*ta), id); a_n = u_trans_n((*ta), id);
+	for (i = out->n = buf->n = 0; i < a_n; i++) {
+		b = u_trans_a((*ta), a[i].tn); b_n = u_trans_n((*ta), a[i].tn);
+		z = a[i].tn; z <<= 32; kv_push(uint64_t, *buf, z);
+		for (k = 0; k < b_n; k++) {
+			if(b[k].tn == id) continue;
+			if(!tranfor_ovlp(&(a[i]), &(b[k]), g, &rr, 1)) continue;
+			z = rr.tn; z <<= 32; z |= out->n; z |= ((uint64_t)0x80000000);
+			rr.tn <<= 1; rr.tn |= rr.rev; kv_push(ul_ov_t, *out, rr); 
+		}
+	}
+	if(out->n == 0) return 1;
+
+	radix_sort_gfa64(buf->a, buf->a + buf->n); 
+	for (k = 1, l = m = 0; k <= buf->n; k++) {
+		if(k == buf->n || (buf->a[l]>>32)!=(buf->a[k]>>32)) {
+			if((k - l > 1) && (!(buf->a[l]&((uint64_t)0x80000000)))) {///overlap within bck
+				for (z = l; z < k; z++) {
+					if(buf->a[z]&((uint64_t)0x80000000)) {
+						out->a[(uint32_t)(buf->a[z]-((uint64_t)0x80000000))].tn = (uint32_t)-1;
+						m++;
+					}
+				}
+			}
+			l = k;
+		}
+	}
+
+	if(m) {
+		for (k = m = 0; k < out->n; k++) {
+			if(out->a[k].tn == (uint32_t)-1) continue;
+			out->a[m++] = out->a[k];
+		}
+		out->n = m;
+	}
+	if(out->n == 0) return 1;
+
+	radix_sort_ul_ov_srt_tn(out->a, out->a+out->n);
+	for (k = 0; k < out->n; k++) out->a[k].tn >>= 1;
+
+	return 0;
+}
+
+/**
+uint64_t gen_trans_chain_mmhap(ug_trans_t *s, uint64_t rid, ha_ovec_buf_t *b, kv_ul_ov_t *bl, char *seq, uint64_t len, 
+double err, double bw)
+{
+	uint64_t cnt = ((s->idx_n.a[rid+1]-s->idx_n.a[rid])), ol_h = 0, pass_aln = 0; 
+	uint32_t high_occ = asm_opt.polyploidy + 1; overlap_region *aux_o = NULL; 
+	///note: high_occ is different
+	ug_map_lchain(b->abl, rid, seq, len, s->w, s->k, &(s->udb), &b->olist, &b->clist, bw, bw,
+		s->max_n_chain, 1, NULL, &(b->tmp_region), NULL, &(b->sp), &high_occ, NULL, 0, 1, 0.2, 3, 
+		s->is_HPC, s->idx_a.a + s->idx_n.a[rid], cnt, s->srt_a.a, s->srt_a.n, s->mini_cut, s->chain_cut, NULL);
+	// if(rid == 57) {
+	//  fprintf(stderr, "-1-[M::%s] utg%.6lu%c, rid::%ld, b->olist->length::%lu\n", 
+	//      __func__, rid+1, "lc"[s->ug->u.a[rid].circ], rid, b->olist.length);
+	// }
+	///remove candidate chains that have been calculated
+	if(!fi) backward_dedup_ol(rid, bl, &(b->sp), &b->olist);///it is ok
+	// if(rid == 57) {
+	//  fprintf(stderr, "-2-[M::%s] utg%.6lu%c, rid::%ld, b->olist->length::%lu\n", 
+	//      __func__, rid+1, "lc"[s->ug->u.a[rid].circ], rid, b->olist.length);
+	// }
+	filter_by_reliable_ovlp_mmhap_adv(rid, s->filter, &(b->sp), &b->olist, &(s->udb), s->sec_cutoff, 1, 1, s->ccov, &ol_h);
+	clear_Cigar_record(&b->cigar1); clear_Round2_alignment(&b->round2); 
+	if(!fi) ol_h = 0;
+
+	// if(rid == 57) {
+	//  fprintf(stderr, "-3-[M::%s] utg%.6lu%c, rid::%ld, b->olist->length::%lu\n", 
+	//      __func__, rid+1, "lc"[s->ug->u.a[rid].circ], rid, b->olist.length);
+	// }
+	
+	ol_h = split_ug_lalign(ol_h, &b->olist, err_high, err_low, 
+		&b->clist, &(s->udb), s->uopt, seq, len, &b->self_read, &b->ovlp_read, 
+		&b->correct, &b->exz, aux_o, rid, s->k, s->chain_cut, NULL);
+
+	// if(rid == 57) {
+	//  fprintf(stderr, "-4-[M::%s] utg%.6lu%c, rid::%ld, b->olist->length::%lu\n", 
+	//      __func__, rid+1, "lc"[s->ug->u.a[rid].circ], rid, b->olist.length);
+	// }
+	
+	aux_o = gen_aux_ovlp(&b->olist);///must be here
+
+	// if(rid == 57) {
+	//  fprintf(stderr, "-5-[M::%s] utg%.6lu%c, rid::%ld, b->olist->length::%lu\n", 
+	//      __func__, rid+1, "lc"[s->ug->u.a[rid].circ], rid, b->olist.length);
+	// }
+
+	ol_h = split_ug_lalign(ol_h, &b->olist, err_high, err_low, 
+		&b->clist, &(s->udb), s->uopt, seq, len, &b->self_read, &b->ovlp_read, 
+		&b->correct, &b->exz, aux_o, rid, s->k, s->chain_cut, NULL);
+	
+	// if(rid == 57) {
+	//  fprintf(stderr, "-6-[M::%s] utg%.6lu%c, rid::%ld, b->olist->length::%lu\n", 
+	//      __func__, rid+1, "lc"[s->ug->u.a[rid].circ], rid, b->olist.length);
+	// }
+	
+	if(fi) {///first round
+		pass_aln = test_het_aln_mmhap(rid, s->ccov, u_trans_a((*(s->filter)), rid), u_trans_n((*(s->filter)), rid), &b->olist, &(b->sp));
+		push_ul_ov_t(&(s->udb), u_trans_a((*(s->filter)), rid), u_trans_n((*(s->filter)), rid), rid, &(b->sp), &b->olist, len, pass_aln, err_high, bl);
+		// fprintf(stderr, "-1-[M::%s] utg%.6lu%c, rid::%lu, pass_aln::%lu\n", 
+		//  __func__, rid+1, "lc"[s->ug->u.a[rid].circ], rid, pass_aln);
+	} else {///second round
+		push_ul_ov_t(&(s->udb), NULL, 0, rid, &(b->sp), &b->olist, len, 0, err_high, bl);
+		remove_trans_ovlp_connect(s->udb.ug, rid, bl);
+	}
+	return pass_aln;
+}
+**/
+
+
+
+static void worker_for_trans_chain_mmhap_adv(void *data, long i, int tid) // callback for kt_for()
+{
+    ug_trans_t *s = (ug_trans_t*)data;
+    ha_ovec_buf_t *b = s->hab[tid]; kv_ul_ov_t *bl = &(s->ll[tid].tk);
+	uint32_t high_occ = asm_opt.polyploidy + 1; uint64_t cnt;
+	char *seq = s->ug->u.a[i].s; int64_t len = s->ug->u.a[i].len;
+	if((!s->is_ovlp) && (s->is_cnt)) s->idx_n.a[i] = 0;
+	if(s->ug->g->seq[i].del) return;
+	if(is_mmhom_node(s->ccov->cov.a+s->ccov->idx[i], &(s->ug->u.a[i]), s->ccov->rg, s->ccov->hom_min, 0.9)) return;
+	// asprintf(&as, "\n[M::%s] rid::%ld, len::%lu, name::%.*s\n", __func__, s->id+i, s->len[i], (int32_t)UL_INF.nid.a[s->id+i].n, UL_INF.nid.a[s->id+i].a);
+    // push_vlog(&(overall_zdbg->a[s->id+i]), as); free(as); as = NULL;
+	// if(rescue_adject_ovlp(s->ug->g, i, s->filter, &(s->ll[tid].lo))) return;
+
+	// gen_trans_chain_mmhap(s, i, b, bl, seq, len, 0.8, 0.8);
+
+	if(!s->is_ovlp) {
+		if(s->is_cnt) {
+			s->idx_n.a[i] = ug_map_lchain(b->abl, i, seq, len, s->w, s->k, &(s->udb), NULL, NULL, s->bw_thres, s->bw_thres_double,
+            	s->max_n_chain, 1, NULL, &(b->tmp_region), NULL, &(b->sp), &high_occ, NULL, 0, 1, 0.2, 3, s->is_HPC, NULL, 0, NULL, 0, s->mini_cut, s->chain_cut, NULL);
+		} else {
+			cnt = ug_map_lchain(b->abl, i, seq, len, s->w, s->k, &(s->udb), NULL, NULL, s->bw_thres, s->bw_thres_double,
+				s->max_n_chain, 1, NULL, &(b->tmp_region), NULL, &(b->sp), &high_occ, NULL, 0, 1, 0.2, 3, s->is_HPC, s->idx_a.a + s->idx_n.a[i], 0, NULL, 0, s->mini_cut, s->chain_cut, NULL);
+			assert(cnt == ((s->idx_n.a[i+1]-s->idx_n.a[i])));
+		}
+		if(s->free_cnt[tid] >= FREE_BATCH) {
+			clear_count_buf(s, tid, 1); s->free_cnt[tid] = 0;
+		}
+		s->free_cnt[tid]++;
+		return;
+	} 
+
+	// if(i == 58) {
+	// 	fprintf(stderr, "\n-1-[M::%s] utg%.6u%c, rid::%ld, is_ovlp::%d, is_cnt::%d, len::%ld, str::%u\n", 
+	// 		__func__, (uint32_t)i+1, "lc"[s->ug->u.a[i].circ], i, s->is_ovlp, s->is_cnt, len, (uint32_t)(!!seq));
+	// }
+
+	if(!gen_trans_adaptive_mmhap_aln(s, i, b, bl, seq, len, s->filter, s->diff_ec_ul, s->diff_ec_ul_double, s->bw_thres, s->bw_thres_double)) {
+		gen_trans_adaptive_mmhap_aln(s, i, b, bl, seq, len, NULL, s->diff_ec_ul_double, s->diff_ec_ul_double, s->bw_thres_double, s->bw_thres_double);
+	}
+	if(s->free_cnt[tid] >= FREE_BATCH) {
+		clear_count_buf(s, tid, 0); s->free_cnt[tid] = 0;
+	}
+	s->free_cnt[tid]++;
+}
+
 
 int64_t retrieve_cigar_err_dir(bit_extz_t *ez, int64_t s, int64_t e, int64_t *xk, int64_t *ck, int64_t is_back)
 {	///[ez->ts, ez->te]/[ez->qs, ez->qe]/[s, e)
@@ -20178,6 +20374,145 @@ void gen_trans_base_count_comp(ug_trans_t *p, kv_u_trans_t *res)
 	fprintf(stderr, "[M::%s::%.3f] ==> Qualification\n", __func__, yak_realtime()-index_time);
 }
 
+void clean_trans_base_count_mmhap_comp_rmap(ug_trans_t *p, kv_u_trans_t *res)
+{
+	uint64_t i, k, l, occ, idx_n; ha_mzl_t *tz; u_trans_t *z; 
+	kv_ul_ov_t *bl; double ww; ha_mzl_t *idx; 
+	///make results consistent
+	kv_resize(ha_mzl_t, p->srt_a, p->srt_a.n+p->ug->u.n); 
+	idx = p->srt_a.a + p->srt_a.n; idx_n = p->ug->u.n;
+	for (i = 0; i < idx_n; i++) {
+		tz = &(idx[i]);
+		tz->x = (uint64_t)-1; tz->rev = 0;
+		tz->pos = tz->rid = tz->span = 0;
+	}
+
+	for (i = 0, occ = res->n; (int64_t)i < p->n_thread; i++) {
+		bl = &(p->ll[i].tk); 
+		if(!(bl->n)) continue;
+		for (k = 1, l = 0; k <= bl->n; k++) {
+			if(k == bl->n || bl->a[k].qn != bl->a[l].qn) {
+				if(k > l) {
+					tz = &(idx[bl->a[l].qn]);
+					tz->x = bl->a[l].qn; tz->x <<= 32; tz->x |= i;
+					tz->rid = l>>32; tz->pos = (uint32_t)l; tz->rev = 1;
+					occ += (k - l);
+				}
+				l = k;
+			}
+		}
+	}
+
+	kv_resize(u_trans_t, *res, occ); 
+	for (i = 0; i < idx_n; i++) {
+		tz = &(idx[i]);
+		if(!(tz->rev)) continue;
+		bl = &(p->ll[(uint32_t)(tz->x)].tk);
+		k = tz->rid; k <<= 32; k += tz->pos;
+		assert(bl->a[k].qn == (tz->x>>32));
+		for (; (k < bl->n) && (bl->a[k].qn == (tz->x>>32)); k++) {
+			if(bl->a[k].qn == bl->a[k].tn) continue;
+			ww = cal_trans_ov_w(&(bl->a[k])); 
+			if(ww <= 0) continue;
+			
+			kv_pushp(u_trans_t, *res, &z);
+			z->f = RC_3; z->rev = bl->a[k].rev; z->del = 0; 
+			z->qn = bl->a[k].qn; z->qs = bl->a[k].qs; z->qe = bl->a[k].qe;
+			z->tn = bl->a[k].tn; z->ts = bl->a[k].ts; z->te = bl->a[k].te;
+			z->nw = ww;
+		}
+	}
+	destory_ug_rid_cov_t(p->ccov); free(p->ccov); 
+	p->ccov = gen_ug_rid_cov_t(p->ug, p->rg, p->uopt->sources);
+
+	clean_u_trans_t_idx_filter_mmhap_adv(res, p->ug, p->rg, p->uopt->sources, p->ccov);
+	gen_ug_rid_cov_t_by_ovlp(res, p->ccov);
+}
+
+void gen_trans_base_count_mmhap_comp_rmap(ug_trans_t *p, kv_u_trans_t *res)
+{
+	double index_time = yak_realtime();
+	uint64_t i, k, l, occ, m, cc; 
+	p->ccov = gen_ug_rid_cov_t(p->ug, p->rg, p->uopt->sources);
+	clean_u_trans_t_idx_adv(res, p->ug, p->rg); p->filter = res;
+
+	p->is_cnt = 1; p->is_ovlp = 0; 
+	memset(p->free_cnt, 0, sizeof((*(p->free_cnt)))*p->n_thread);
+	kt_for(p->n_thread, worker_for_trans_ovlp_mmhap_adv, p, p->ug->u.n);
+	for (i = l = 0; i < p->ug->u.n; i++) {
+		occ = p->idx_n.a[i]; p->idx_n.a[i] = l; l += occ;
+	}
+
+	p->idx_n.a[i] = l;
+	p->idx_a.n = p->idx_a.m = l; MALLOC(p->idx_a.a, p->idx_a.n);
+	p->is_cnt = 0; p->is_ovlp = 0; 
+	memset(p->free_cnt, 0, sizeof((*(p->free_cnt)))*p->n_thread);
+	kt_for(p->n_thread, worker_for_trans_ovlp_mmhap_adv, p, p->ug->u.n);
+	p->srt_a.n = p->srt_a.m = p->idx_a.n; MALLOC(p->srt_a.a, p->srt_a.n);
+	
+	for (i = 0; i < p->srt_a.n; i++) {
+		p->srt_a.a[i] = p->idx_a.a[i];
+		p->srt_a.a[i].pos = (uint32_t)i;
+		p->srt_a.a[i].rid = i>>32;
+	}
+	radix_sort_ha_mzl_t_srt(p->srt_a.a, p->srt_a.a + p->srt_a.n);
+	kvec_t(uint64_t) cut; kv_init(cut);
+	for (k = 1, l = 0; k <= p->srt_a.n; k++) {
+		if(k == p->srt_a.n || p->srt_a.a[l].x != p->srt_a.a[k].x) {
+			for (i = l; i < k; i++) {
+				m = p->srt_a.a[i].rid; m <<= 32; m |= p->srt_a.a[i].pos;
+				assert(p->srt_a.a[i].x == p->idx_a.a[m].x);
+				p->srt_a.a[i] = p->idx_a.a[m]; p->idx_a.a[m].x = i;
+			}
+			kv_push(uint64_t, cut, (k - l));
+			l = k;
+		}
+	}
+
+	if(cut.n > 0) {
+		radix_sort_gfa64(cut.a, cut.a + cut.n);
+		m = cut.n * 0.0002; cc = cut.a[cut.n-1] + 1;
+		if(m > 0 && m <= cut.n) cc = cut.a[cut.n-m] + 1;
+		if(cc < (uint64_t)p->mini_cut) p->mini_cut = cc;
+	}
+	kv_destroy(cut);
+	
+	p->is_cnt = 0; p->is_ovlp = 1; 
+	memset(p->free_cnt, 0, sizeof((*(p->free_cnt)))*p->n_thread);
+	kt_for(p->n_thread, worker_for_trans_ovlp_mmhap_adv, p, p->ug->u.n);
+
+	clean_trans_base_count_mmhap_comp_rmap(p, res);
+
+	for (i = 0; (int64_t)i < p->n_thread; i++) p->ll[i].tk.n = 0;
+
+	p->is_cnt = 0; p->is_ovlp = 1; 
+	memset(p->free_cnt, 0, sizeof((*(p->free_cnt)))*p->n_thread);
+	kt_for(p->n_thread, worker_for_trans_chain_mmhap_adv, p, p->ug->u.n);
+
+
+
+
+
+	for (i = 0; (int64_t)i < p->n_thread; i++) {
+		ha_ovec_destroy(p->hab[i]);
+		free(p->ll[i].lo.a); free(p->ll[i].srt.a.a); free(p->ll[i].tc.a);
+	}
+	free(p->idx_a.a); free(p->idx_n.a); free(p->hab); free(p->free_cnt);
+	destory_ug_rid_cov_t(p->ccov); free(p->ccov);
+
+
+
+
+
+
+	
+
+
+
+	for (i = 0; (int64_t)i < p->n_thread; i++) free(p->ll[i].tk.a);
+	free(p->srt_a.a); free(p->ll);
+	fprintf(stderr, "[M::%s::%.3f] ==> Qualification\n", __func__, yak_realtime()-index_time);
+}
 
 void gen_trans_base_count_mmhap_comp(ug_trans_t *p, kv_u_trans_t *res)
 {
diff --git a/Additional_src/calculate_N50.py b/Additional_src/calculate_N50.py
new file mode 100644
index 0000000..6e98261
--- /dev/null
+++ b/Additional_src/calculate_N50.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+# coding=utf-8
+
+"""
+Скрипт считает N50 последовательностей в формате FASTA. Выдаёт он только одно число - N50. 
+
+Пример:
+python3 calculate_N50.py assembly.fasta
+"""
+
+import sys
+import os
+import re
+
+s_path_to_the_input_file = sys.argv[1]
+
+l_lengths_of_sequences = [] #список длин последовательностей.
+n_sum_of_lengths_of_all_sequences = 0 #сумма длин всех последовательностей.
+
+
+f_infile = open(s_path_to_the_input_file, "r")
+
+s_current_sequence = "" #последовательность, которую скрипт в данный момент рассматривает.
+
+for s_line in f_infile:
+	#если это заголовок последовательности, то скрипт записывает длину прошлой последовательности в список l_lengths_of_sequences и обнуляет значение переменной s_current_sequence
+	if re.search("^>", s_line):
+		if s_current_sequence != "": #если прошлой последовательности не было, то, скорее всего, это потому что я сейчас смотрю на первую последовательность в файле. Тогда прошлую последовательность не надо добавлять в список l_lengths_of_sequences
+			n_sequence_length = len(s_current_sequence)
+			l_lengths_of_sequences.append(n_sequence_length)
+			n_sum_of_lengths_of_all_sequences += n_sequence_length
+		s_current_sequence = ""
+	else:
+		s_current_sequence += re.sub(r"\s", r"", s_line) #убираю пробельные символы, включая символы переноса строки.
+
+#Добавляю длину последней последовательности
+n_sequence_length = len(s_current_sequence)
+l_lengths_of_sequences.append(n_sequence_length)
+n_sum_of_lengths_of_all_sequences += n_sequence_length
+
+#сортирую массив длин последовательностей в обратном порядке.
+l_lengths_of_sequences_sorted_backwards = sorted(l_lengths_of_sequences, reverse=True)
+
+#считаю N50.
+n_current_sum_of_lengths = 0
+for n_sequence_length in l_lengths_of_sequences_sorted_backwards:
+	n_current_sum_of_lengths += n_sequence_length
+	if n_current_sum_of_lengths >= n_sum_of_lengths_of_all_sequences/2:
+		print(str(n_sequence_length))
+		sys.exit()
+	
+
+
+
+
+
+
+
+
+
diff --git a/calculate_AG.py b/calculate_AG.py
index 003d86f..1c97d86 100755
--- a/calculate_AG.py
+++ b/calculate_AG.py
@@ -19,6 +19,7 @@
 import sys
 import os
 import re
+import time
 import datetime
 import urllib.request
 import statistics
@@ -88,7 +89,7 @@
 	s_number_of_busco_orthogroups_to_use = "1000" #сколько ортогрупп BUSCO использовать. Это строка, содержащая или число, или слово "all", если нужно использовать все. Если пользователь укажет больше, чем есть в используемой базе данных BUSCO, то calculate_AG всё равно будет использовать все.
 	s_maximum_allowed_intron_length = "from_BUSCO" #максимальная разрешённая длина интрона. По умолчанию, используется значение из файла dataset.cfg датасета BUSCO. Переменная начинается с "s_", потому что это строка. Ниже будет ещё переменная n_maximum_allowed_intron_length, которая число.
 
-	s_version_of_calculate_AG = "2.19" #версия этой программы. Всегда равна версии Mabs. Поскольку эта программа нужна, в первую очередь, для Mabs, то когда я увеличиваю номер версии Mabs, то увеличивается и номер версии calculate_AG, и наоборот.
+	s_version_of_calculate_AG = "2.24" #версия этой программы. Всегда равна версии Mabs. Поскольку эта программа нужна, в первую очередь, для Mabs, то когда я увеличиваю номер версии Mabs, то увеличивается и номер версии calculate_AG, и наоборот.
 
 	l_errors_in_command_line = [] #список ошибок в командной строке. Если пользователь совершил много ошибок, то calculate_AG напишет про них все, а не только про первую встреченную.
 
@@ -208,7 +209,7 @@
 		
 		s_path_to_a_local_busco_dataset = s_path_to_the_output_folder + "/" + s_busco_dataset_name_online #путь к месту, где будет лежать скачанный архивированный gzip файл с датасетом BUSCO.
 		
-		#проверяю, доступен ли адрес http://mikeshelk.site/Data/BUSCO_datasets/Latest/. Он может быть недоступен из-за каких-то проблем с сервером. Если не доступен, то рекомендую пользователю скачать базу с http://busco-data.ezlab.org/v5/data/lineages/ и использовать опцию --local_busco_dataset. Проверку делаю примерно как написано на https://stackoverflow.com/questions/1949318/checking-if-a-website-is-up-via-python . А если доступен, то делаю ещё одну проверку — на то, есть ли нужный файл в папке http://mikeshelk.site/Data/BUSCO_datasets/Latest/
+		#проверяю, доступен ли адрес http://mikeshelk.site/Data/BUSCO_datasets/Latest/. Он может быть недоступен из-за каких-то проблем с сервером. Если не доступен, то пробую ещё два раза с интервалом в 5 секунд. Если адрес так и не станет доступным, то рекомендую пользователю скачать базу с http://busco-data.ezlab.org/v5/data/lineages/ и использовать опцию --local_busco_dataset. Проверку делаю примерно как написано на https://stackoverflow.com/questions/1949318/checking-if-a-website-is-up-via-python . А если доступен, то делаю ещё одну проверку — на то, есть ли нужный файл в папке http://mikeshelk.site/Data/BUSCO_datasets/Latest/
 		try:
 			s_dummy_variable = urllib.request.urlopen("http://mikeshelk.site/Data/BUSCO_datasets/Latest/").getcode()
 			
@@ -219,7 +220,28 @@
 				l_errors_in_command_line.append("The file " + s_busco_dataset_name_online + " does not exist at http://mikeshelk.site/Data/BUSCO_datasets/Latest/ .")
 
 		except:
-			l_errors_in_command_line.append("http://mikeshelk.site/Data/BUSCO_datasets/Latest/ is not accessible. Please, download a BUSCO dataset from http://busco-data.ezlab.org/v5/data/lineages/ and use \"--local_busco_dataset\" instead of \"--download_busco_dataset\".")
+			time.sleep(5)
+			try:
+				s_dummy_variable = urllib.request.urlopen("http://mikeshelk.site/Data/BUSCO_datasets/Latest/").getcode()
+				#проверяю, доступен ли нужный файл, и если доступен, то качаю его.
+				try:
+					urllib.request.urlretrieve("http://mikeshelk.site/Data/BUSCO_datasets/Latest/" + s_busco_dataset_name_online, s_path_to_a_local_busco_dataset)
+				except:
+					l_errors_in_command_line.append("The file " + s_busco_dataset_name_online + " does not exist at http://mikeshelk.site/Data/BUSCO_datasets/Latest/ .")
+
+			except:
+				time.sleep(5)
+				try:
+					s_dummy_variable = urllib.request.urlopen("http://mikeshelk.site/Data/BUSCO_datasets/Latest/").getcode()
+					#проверяю, доступен ли нужный файл, и если доступен, то качаю его.
+					try:
+						urllib.request.urlretrieve("http://mikeshelk.site/Data/BUSCO_datasets/Latest/" + s_busco_dataset_name_online, s_path_to_a_local_busco_dataset)
+
+					except:
+						l_errors_in_command_line.append("The file " + s_busco_dataset_name_online + " does not exist at http://mikeshelk.site/Data/BUSCO_datasets/Latest/ .")
+				
+				except:
+					l_errors_in_command_line.append("http://mikeshelk.site/Data/BUSCO_datasets/Latest/ is not accessible. Please, download a BUSCO dataset from http://busco-data.ezlab.org/v5/data/lineages/ and use \"--local_busco_dataset\" instead of \"--download_busco_dataset\".")
 	
 	#если пользователь использовал --local_busco_dataset
 	o_regular_expression_results = re.search(r" --local_busco_dataset (\S+)", s_command_line_reduced)
@@ -332,13 +354,13 @@
 	################################
 	#Со входными параметрами разобрался. Теперь, собственно, делаю работу.
 
-	f_logs = open(s_path_to_the_output_folder + "/logs.txt","w",buffering=1)
+	f_log = open(s_path_to_the_output_folder + "/log.txt","w",buffering=1)
 	o_current_time_and_date = datetime.datetime.now()
 	s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
-	f_logs.write(s_current_time_and_date + "\n")
-	f_logs.write("Started calculate_AG\n\n")
+	f_log.write(s_current_time_and_date + "\n")
+	f_log.write("Started calculate_AG\n\n")
 
-	f_logs.write("You have run calculate_AG of version " + s_version_of_calculate_AG + " with the following command: " + s_command_line + "\n\n")
+	f_log.write("You have run calculate_AG of version " + s_version_of_calculate_AG + " with the following command: " + s_command_line + "\n\n")
 
 	#сделаю специальный файл, в который в конце будет записана только строка вроде "AG is 1023".
 	f_AG_calculation_results = open(s_path_to_the_output_folder + "/AG.txt", "w")
@@ -353,7 +375,7 @@
 		s_path_to_a_local_busco_dataset = s_path_to_the_output_folder + "/" + s_busco_dataset_name
 	
 	#Оставляю из базы BUSCO только нужное количество (s_number_of_busco_orthogroups_to_use) ортогрупп — тех, которые имеют наиболее консервативные последовательности. Если пользователь указал использовать все ортогруппы, то calculate_AG использует все. Если пользователь указал больше ортогрупп, чем есть в этом наборе BUSCO, то calculate_AG использует все и пишет Warning в основной файл с логами.
-	mabs_function_preprocess_busco_dataset.function_preprocess_busco_dataset(s_path_to_a_local_busco_dataset, s_number_of_busco_orthogroups_to_use, s_path_to_the_output_folder, f_logs)
+	mabs_function_preprocess_busco_dataset.function_preprocess_busco_dataset(s_path_to_a_local_busco_dataset, s_number_of_busco_orthogroups_to_use, s_path_to_the_output_folder, f_log)
 	
 	s_path_to_a_BUSCO_folder = s_path_to_the_output_folder + "/BUSCO_dataset_to_use/"
 
@@ -366,7 +388,7 @@
 	#если файл с контигами пустой, то сразу останавливаю выполнение calculate_AG, считая что AG=0. Иначе Metaeuk выдаст ошибку (если я правильно помню).
 	n_size_of_the_file_with_contigs = os.stat(s_path_to_the_assembly).st_size
 	if n_size_of_the_file_with_contigs == 0:
-		f_logs.write("AG is 0")
+		f_log.write("AG is 0")
 		f_AG_calculation_results.write("AG is 0")
 		sys.exit()
 
@@ -425,7 +447,7 @@
 
 	#если MetaEuk вообще не выдал результатов, то считаю, что AG=0.
 	if not os.path.exists(s_path_to_the_output_folder + "/MetaEuk_results.fas"):
-		f_logs.write("AG is 0. Number of genes in single-copy orthogroups is 0. Number of genes in true multicopy orthogroups is 0. Number of genes in false multicopy orthogroups is 0.\n")
+		f_log.write("AG is 0. Number of genes in single-copy orthogroups is 0. Number of genes in true multicopy orthogroups is 0. Number of genes in false multicopy orthogroups is 0.\n")
 		f_AG_calculation_results.write("AG is 0")
 		sys.exit()
 
@@ -436,7 +458,7 @@
 		s_path_to_the_file_with_BUSCO_scores_cutoff = s_path_to_a_BUSCO_folder + "/scores_cutoff"
 		s_path_to_the_file_with_BUSCO_lengths_cutoff = s_path_to_a_BUSCO_folder + "/lengths_cutoff"
 
-		#f_logs = open("logs.txt", "w", buffering = 1)
+		#f_log = open("log.txt", "w", buffering = 1)
 
 		#делаю словарь, в котором ключ это название ортогруппы, вроде 54443at71240, а значение это bit score cutoff, вроде 302.75.
 		d_orthogroup_title_to_bit_score_cutoff = {}
@@ -583,7 +605,7 @@
 					n_first_exon_coordinate += 1 #прибавляю единицу, потому что Metaeuk выдаёт координаты zero-based.
 										
 					#удаляю упоминание об этом экзоне из строки, чтобы можно было начать рассматривать новый.
-					#f_logs.write("analyzed exon " + o_regular_expression_results_2.group(0) + "\n")
+					#f_log.write("analyzed exon " + o_regular_expression_results_2.group(0) + "\n")
 					s_exon_information_with_masked_metacharacters = re.escape(o_regular_expression_results_2.group(0)) #s_exon_information_with_masked_metacharacters это как o_regular_expression_results_2.group(0) , но все метасимволы замаскированы. Нужно, чтобы правильно прошло удаление этой подстроки из s_string_with_exons с помощью re.sub
 					s_string_with_exons = re.sub(s_exon_information_with_masked_metacharacters, "", s_string_with_exons)
 		
@@ -714,77 +736,81 @@
 				s_orthogroup_title = l_line_split[3]
 				n_bit_score = float(l_line_split[7])
 				
-				if s_orthogroup_title not in dl_orthogroup_title_to_the_list_of_targets_I_have_already_seen_in_this_file:
-					dl_orthogroup_title_to_the_list_of_targets_I_have_already_seen_in_this_file[s_orthogroup_title] = []
+				#Иногда бывает такое, что ген, найденный MetaEuk для одной ортогруппы BUSCO, имеет также матчи к марковским профилями других ортогрупп. Благодаря следующей строке я учитываю только матчи к профилю той же ортогруппы, по белку которой этот ген был найден.
+		
+				if re.search(r"^" + s_orthogroup_title, s_target_name):
 				
-				if s_target_name not in dl_orthogroup_title_to_the_list_of_targets_I_have_already_seen_in_this_file[s_orthogroup_title]:
-					dl_orthogroup_title_to_the_list_of_targets_I_have_already_seen_in_this_file[s_orthogroup_title].append(s_target_name)
+					if s_orthogroup_title not in dl_orthogroup_title_to_the_list_of_targets_I_have_already_seen_in_this_file:
+						dl_orthogroup_title_to_the_list_of_targets_I_have_already_seen_in_this_file[s_orthogroup_title] = []
 					
-					if n_bit_score >= d_orthogroup_title_to_bit_score_cutoff[s_orthogroup_title]:
-						n_z_value = (n_target_length - d_orthogroup_title_to_the_average_BUSCO_protein_length[s_orthogroup_title]) / d_orthogroup_title_to_the_standard_deviation_of_BUSCO_protein_lengths[s_orthogroup_title]
+					if s_target_name not in dl_orthogroup_title_to_the_list_of_targets_I_have_already_seen_in_this_file[s_orthogroup_title]:
+						dl_orthogroup_title_to_the_list_of_targets_I_have_already_seen_in_this_file[s_orthogroup_title].append(s_target_name)
 						
-						if n_z_value >= -2:													
-							o_regular_expression_results = re.search(r"^(.*?)\|([^\|]+)\|(\+|\-)\|.*?\|.*?\|.*?\|(\d+)\|(\d+)\|(.+)", s_target_name)
-
-							if o_regular_expression_results:
-								s_orthogroup_title = o_regular_expression_results.group(1)
-								s_contig_title = o_regular_expression_results.group(2)
-								s_chain = o_regular_expression_results.group(3) #цепь, на которой лежит ген. "+" или "-".
-								n_leftmost_coordinate_of_the_gene = int(o_regular_expression_results.group(4)) + 1 #прибавляю единицу, потому что Metaeuk выдаёт координаты zero-based.
-								n_rightmost_coordinate_of_the_gene = int(o_regular_expression_results.group(5))
-								s_string_with_exons = o_regular_expression_results.group(6)
-								
-								s_gene_description = s_contig_title + ":" + str(n_leftmost_coordinate_of_the_gene) + "-" + str(n_rightmost_coordinate_of_the_gene)
-								
-								if s_orthogroup_title not in dl_orthogroup_title_to_the_list_of_its_genes:
-									dl_orthogroup_title_to_the_list_of_its_genes[s_orthogroup_title] = []
-								dl_orthogroup_title_to_the_list_of_its_genes[s_orthogroup_title].append(s_gene_description)
-								
-								#f_logs.write("Started to analyze the coverage in exons of " + s_gene_description + "\n")
-								
-								if s_gene_description not in dl_gene_description_to_the_list_of_coverages_in_its_exons:
-									dl_gene_description_to_the_list_of_coverages_in_its_exons[s_gene_description] = []
-								
-								#иду по всем координатам экзонов. Координаты экзона содержатся в подстроке вида 20872[20872]:20765[20765]:108[108]
-								while re.search(r"\d+\[(\d+)\]\:\d+\[(\d+)\]\:\d+\[\d+\]", s_string_with_exons):
-									o_regular_expression_results_2 = re.search(r"\d+\[(\d+)\]\:\d+\[(\d+)\]\:\d+\[\d+\]", s_string_with_exons)
-									n_first_exon_coordinate = int(o_regular_expression_results_2.group(1))
-									n_second_exon_coordinate = int(o_regular_expression_results_2.group(2))
+						if n_bit_score >= d_orthogroup_title_to_bit_score_cutoff[s_orthogroup_title]:
+							n_z_value = (n_target_length - d_orthogroup_title_to_the_average_BUSCO_protein_length[s_orthogroup_title]) / d_orthogroup_title_to_the_standard_deviation_of_BUSCO_protein_lengths[s_orthogroup_title]
+							
+							if n_z_value >= -2:													
+								o_regular_expression_results = re.search(r"^(.*?)\|([^\|]+)\|(\+|\-)\|.*?\|.*?\|.*?\|(\d+)\|(\d+)\|(.+)", s_target_name)
+
+								if o_regular_expression_results:
+									s_orthogroup_title = o_regular_expression_results.group(1)
+									s_contig_title = o_regular_expression_results.group(2)
+									s_chain = o_regular_expression_results.group(3) #цепь, на которой лежит ген. "+" или "-".
+									n_leftmost_coordinate_of_the_gene = int(o_regular_expression_results.group(4)) + 1 #прибавляю единицу, потому что Metaeuk выдаёт координаты zero-based.
+									n_rightmost_coordinate_of_the_gene = int(o_regular_expression_results.group(5))
+									s_string_with_exons = o_regular_expression_results.group(6)
 									
-									#если ген обратно-комплементарный, то первой координатой была записана бОльшая. Делаю первой координатой меньшую.
-									if n_first_exon_coordinate > n_second_exon_coordinate:
-										n_temp = n_second_exon_coordinate
-										n_second_exon_coordinate = n_first_exon_coordinate
-										n_first_exon_coordinate = n_temp
+									s_gene_description = s_contig_title + ":" + str(n_leftmost_coordinate_of_the_gene) + "-" + str(n_rightmost_coordinate_of_the_gene)
 									
-									n_first_exon_coordinate += 1 #прибавляю единицу, потому что Metaeuk выдаёт координаты zero-based.
+									if s_orthogroup_title not in dl_orthogroup_title_to_the_list_of_its_genes:
+										dl_orthogroup_title_to_the_list_of_its_genes[s_orthogroup_title] = []
+									dl_orthogroup_title_to_the_list_of_its_genes[s_orthogroup_title].append(s_gene_description)
 									
-									l_coverages_in_this_exon = [] #список покрытий в этом экзоне
+									#f_log.write("Started to analyze the coverage in exons of " + s_gene_description + "\n")
 									
-									for n_position in range(n_first_exon_coordinate, n_second_exon_coordinate + 1):
+									if s_gene_description not in dl_gene_description_to_the_list_of_coverages_in_its_exons:
+										dl_gene_description_to_the_list_of_coverages_in_its_exons[s_gene_description] = []
+									
+									#иду по всем координатам экзонов. Координаты экзона содержатся в подстроке вида 20872[20872]:20765[20765]:108[108]
+									while re.search(r"\d+\[(\d+)\]\:\d+\[(\d+)\]\:\d+\[\d+\]", s_string_with_exons):
+										o_regular_expression_results_2 = re.search(r"\d+\[(\d+)\]\:\d+\[(\d+)\]\:\d+\[\d+\]", s_string_with_exons)
+										n_first_exon_coordinate = int(o_regular_expression_results_2.group(1))
+										n_second_exon_coordinate = int(o_regular_expression_results_2.group(2))
 										
-										#проверяю, нет ли такого, что в этом контиге не было вообще ни одной покрытой позиции. Тогда dd_contig_title_and_position_to_coverage будет неинициализирован для s_contig_title. В таком случае я считаю покрытие в этой позиции равным нулю.
-										if s_contig_title not in dd_contig_title_and_position_to_coverage:
-											n_coverage = 0
-										else:
-											#поскольку в двойной словарь dd_contig_title_and_position_to_coverage я записывал покрытие только тех позиций, покрытие которых было ненулевым, то сейчас нужно проверить, есть ли эта позиция в этом двойном словаре.
-											if n_position in dd_contig_title_and_position_to_coverage[s_contig_title]:
-												n_coverage = dd_contig_title_and_position_to_coverage[s_contig_title][n_position]
-											else:
+										#если ген обратно-комплементарный, то первой координатой была записана бОльшая. Делаю первой координатой меньшую.
+										if n_first_exon_coordinate > n_second_exon_coordinate:
+											n_temp = n_second_exon_coordinate
+											n_second_exon_coordinate = n_first_exon_coordinate
+											n_first_exon_coordinate = n_temp
+										
+										n_first_exon_coordinate += 1 #прибавляю единицу, потому что Metaeuk выдаёт координаты zero-based.
+										
+										l_coverages_in_this_exon = [] #список покрытий в этом экзоне
+										
+										for n_position in range(n_first_exon_coordinate, n_second_exon_coordinate + 1):
+											
+											#проверяю, нет ли такого, что в этом контиге не было вообще ни одной покрытой позиции. Тогда dd_contig_title_and_position_to_coverage будет неинициализирован для s_contig_title. В таком случае я считаю покрытие в этой позиции равным нулю.
+											if s_contig_title not in dd_contig_title_and_position_to_coverage:
 												n_coverage = 0
+											else:
+												#поскольку в двойной словарь dd_contig_title_and_position_to_coverage я записывал покрытие только тех позиций, покрытие которых было ненулевым, то сейчас нужно проверить, есть ли эта позиция в этом двойном словаре.
+												if n_position in dd_contig_title_and_position_to_coverage[s_contig_title]:
+													n_coverage = dd_contig_title_and_position_to_coverage[s_contig_title][n_position]
+												else:
+													n_coverage = 0
+												
+											l_coverages_in_this_exon.append(n_coverage)
 											
-										l_coverages_in_this_exon.append(n_coverage)
-										
-									#добавляю список покрытий этого экзона к словарю списков, который содержит списки покрытий для каждого гена
-									dl_gene_description_to_the_list_of_coverages_in_its_exons[s_gene_description] += l_coverages_in_this_exon									
-									#удаляю упоминание об этом экзоне из строки, чтобы можно было начать рассматривать новый.
-									#f_logs.write("analyzed exon " + o_regular_expression_results_2.group(0) + "\n")
-									s_exon_information_with_masked_metacharacters = re.escape(o_regular_expression_results_2.group(0)) #s_exon_information_with_masked_metacharacters это как o_regular_expression_results_2.group(0) , но все метасимволы замаскированы. Нужно, чтобы правильно прошло удаление этой подстроки из s_string_with_exons с помощью re.sub
-									s_string_with_exons = re.sub(s_exon_information_with_masked_metacharacters, "", s_string_with_exons)
-						
-						#если ген присутствует, но фрагментирован, то я информацию о нём никак не использую.
-						else:
-							pass
+										#добавляю список покрытий этого экзона к словарю списков, который содержит списки покрытий для каждого гена
+										dl_gene_description_to_the_list_of_coverages_in_its_exons[s_gene_description] += l_coverages_in_this_exon									
+										#удаляю упоминание об этом экзоне из строки, чтобы можно было начать рассматривать новый.
+										#f_log.write("analyzed exon " + o_regular_expression_results_2.group(0) + "\n")
+										s_exon_information_with_masked_metacharacters = re.escape(o_regular_expression_results_2.group(0)) #s_exon_information_with_masked_metacharacters это как o_regular_expression_results_2.group(0) , но все метасимволы замаскированы. Нужно, чтобы правильно прошло удаление этой подстроки из s_string_with_exons с помощью re.sub
+										s_string_with_exons = re.sub(s_exon_information_with_masked_metacharacters, "", s_string_with_exons)
+							
+							#если ген присутствует, но фрагментирован, то я информацию о нём никак не использую.
+							else:
+								pass
 
 		f_infile.close()
 		
@@ -814,7 +840,7 @@
 					
 					l_coverages_in_exons_of_genes += dl_gene_description_to_the_list_of_coverages_in_its_exons[s_gene_description]
 					
-					f_logs.write("For the gene " + s_gene_description + " from a single-copy orthogroup " + s_orthogroup_title + ", the median coverage is " + str(d_gene_description_to_the_median_coverage_in_its_exons[s_gene_description]) + ". It was calculated using " + str(len(dl_gene_description_to_the_list_of_coverages_in_its_exons[s_gene_description])) + " positions.\n")
+					f_log.write("For the gene " + s_gene_description + " from a single-copy orthogroup " + s_orthogroup_title + ", the median coverage is " + str(d_gene_description_to_the_median_coverage_in_its_exons[s_gene_description]) + ". It was calculated using " + str(len(dl_gene_description_to_the_list_of_coverages_in_its_exons[s_gene_description])) + " positions.\n")
 				
 				#если для ортогруппы найдено больше одного гена.
 				if len(dl_orthogroup_title_to_the_list_of_its_genes[s_orthogroup_title]) > 1:
@@ -823,16 +849,16 @@
 						
 						l_coverages_in_exons_of_genes += dl_gene_description_to_the_list_of_coverages_in_its_exons[s_gene_description]
 						
-						f_logs.write("For the gene " + s_gene_description + " from a multicopy orthogroup " + s_orthogroup_title + ", the median coverage is " + str(d_gene_description_to_the_median_coverage_in_its_exons[s_gene_description]) + ". It was calculated using " + str(len(dl_gene_description_to_the_list_of_coverages_in_its_exons[s_gene_description])) + " positions.\n")
+						f_log.write("For the gene " + s_gene_description + " from a multicopy orthogroup " + s_orthogroup_title + ", the median coverage is " + str(d_gene_description_to_the_median_coverage_in_its_exons[s_gene_description]) + ". It was calculated using " + str(len(dl_gene_description_to_the_list_of_coverages_in_its_exons[s_gene_description])) + " positions.\n")
 			
 			#если ни одного однокопийного гена найдено не было (крайне маловероятно, но, в принципе, такое может быть), но многокопийные гены были, то, для простоты, считаю медианным покрытием однокопийных генов медианное покрытие по всем многокопийным генам, делённое пополам.
 			if len(l_coverages_in_exons_of_single_copy_genes) == 0:
 				n_median_coverage_of_exons_of_single_copy_genes = statistics.median(l_coverages_in_exons_of_genes) / 2
-				f_logs.write("\nWarning! No single-copy orthogroups were found. Hence, as the approximate coverage of genes in single-copy orthogroups I take half the median coverage by positions of genes from multicopy, which is " + str(n_median_coverage_of_exons_of_single_copy_genes) + "\n\n")
+				f_log.write("\nWarning! No single-copy orthogroups were found. Hence, as the approximate coverage of genes in single-copy orthogroups I take half the median coverage by positions of genes from multicopy, which is " + str(n_median_coverage_of_exons_of_single_copy_genes) + "\n\n")
 			#если хотя бы один однокопийный ген был.
 			else:
 				n_median_coverage_of_exons_of_single_copy_genes = statistics.median(l_coverages_in_exons_of_single_copy_genes)
-				f_logs.write("\nThe median coverage in exons of genes from single-copy BUSCO orthogroups is " + str(n_median_coverage_of_exons_of_single_copy_genes) + ". It was calculated using " + str(len(l_coverages_in_exons_of_single_copy_genes)) + " positions.\n\n")
+				f_log.write("\nThe median coverage in exons of genes from single-copy BUSCO orthogroups is " + str(n_median_coverage_of_exons_of_single_copy_genes) + ". It was calculated using " + str(len(l_coverages_in_exons_of_single_copy_genes)) + " positions.\n\n")
 
 			#теперь иду по всем ортогруппам и для каждой многокопийной ортогруппы считаю среднее покрытие в ней. Если оно < 0.75*(медианное покрытие в однокопийных генах), то считаю, что это ложно многокопийных ортогруппа. А если >=0.75*(медианное покрытие в однокопийных генах), то считаю, что истинно многокопийная. Параллельно, считаю количество генов в истинно многокопийных ортогруппах и ложно многокопийных ортогруппах.
 			n_number_of_true_multicopy_genes = 0
@@ -846,33 +872,33 @@
 						n_mean_coverage_of_genes_in_this_orthogroup += d_gene_description_to_the_median_coverage_in_its_exons[s_gene_description] / len(dl_orthogroup_title_to_the_list_of_its_genes[s_orthogroup_title])
 					
 					if n_mean_coverage_of_genes_in_this_orthogroup < 0.75 * n_median_coverage_of_exons_of_single_copy_genes:
-						f_logs.write("The mean coverage of genes from a multicopy BUSCO orthogroup " + s_orthogroup_title + " which contains " + str(len(dl_orthogroup_title_to_the_list_of_its_genes[s_orthogroup_title])) + " genes is " + str(round(n_mean_coverage_of_genes_in_this_orthogroup, 1)) + ". It is smaller than " + str(round(0.75 * n_median_coverage_of_exons_of_single_copy_genes, 1)) + ", hence this orthogroup is considered a false multicopy.\n")
+						f_log.write("The mean coverage of genes from a multicopy orthogroup " + s_orthogroup_title + " which contains " + str(len(dl_orthogroup_title_to_the_list_of_its_genes[s_orthogroup_title])) + " genes is " + str(round(n_mean_coverage_of_genes_in_this_orthogroup, 1)) + ". It is smaller than " + str(round(0.75 * n_median_coverage_of_exons_of_single_copy_genes, 1)) + ", hence this orthogroup is considered a false multicopy.\n")
 					
 						n_number_of_false_multicopy_genes += len(dl_orthogroup_title_to_the_list_of_its_genes[s_orthogroup_title])
 						
 					#в принципе, это условие и следующее можно объединить в одно (">="). Но для удобства чтения логов я разделю случай ">" и случай "=". Впрочем, думаю, случай "=" будет встречаться крайне редко.
 					elif n_mean_coverage_of_genes_in_this_orthogroup == 0.75 * n_median_coverage_of_exons_of_single_copy_genes:
-						f_logs.write("The mean coverage of genes from a multicopy BUSCO orthogroup " + s_orthogroup_title + " which contains " + str(len(dl_orthogroup_title_to_the_list_of_its_genes[s_orthogroup_title])) + " genes is " + str(round(n_mean_coverage_of_genes_in_this_orthogroup, 1)) + ". It is equal to 0.75 * (median_coverage_in_exons_of_single_copy_genes), hence this orthogroup is considered a true multicopy.\n")
+						f_log.write("The mean coverage of genes from a multicopy orthogroup " + s_orthogroup_title + " which contains " + str(len(dl_orthogroup_title_to_the_list_of_its_genes[s_orthogroup_title])) + " genes is " + str(round(n_mean_coverage_of_genes_in_this_orthogroup, 1)) + ". It is equal to 0.75 * (median_coverage_in_exons_of_single_copy_genes), hence this orthogroup is considered a true multicopy.\n")
 						
 						n_number_of_true_multicopy_genes += len(dl_orthogroup_title_to_the_list_of_its_genes[s_orthogroup_title])
 					
 					elif n_mean_coverage_of_genes_in_this_orthogroup > 0.75 * n_median_coverage_of_exons_of_single_copy_genes:
-						f_logs.write("The mean coverage of genes from a multicopy BUSCO orthogroup " + s_orthogroup_title + " which contains " + str(len(dl_orthogroup_title_to_the_list_of_its_genes[s_orthogroup_title])) + " genes is " + str(round(n_mean_coverage_of_genes_in_this_orthogroup, 1)) + ". It is larger than " + str(round(0.75 * n_median_coverage_of_exons_of_single_copy_genes, 1)) + ", hence this orthogroup is considered a true multicopy.\n")
+						f_log.write("The mean coverage of genes from a multicopy orthogroup " + s_orthogroup_title + " which contains " + str(len(dl_orthogroup_title_to_the_list_of_its_genes[s_orthogroup_title])) + " genes is " + str(round(n_mean_coverage_of_genes_in_this_orthogroup, 1)) + ". It is larger than " + str(round(0.75 * n_median_coverage_of_exons_of_single_copy_genes, 1)) + ", hence this orthogroup is considered a true multicopy.\n")
 						
 						n_number_of_true_multicopy_genes += len(dl_orthogroup_title_to_the_list_of_its_genes[s_orthogroup_title])
 			
 			n_AG = n_number_of_single_copy_genes_found_in_the_assembly + n_number_of_true_multicopy_genes
 	else:
-		f_logs.write("AG is 0. Number of genes in single-copy orthogroups is 0. Number of genes in true multicopy orthogroups is 0. Number of genes in false multicopy orthogroups is 0.\n")
+		f_log.write("AG is 0. Number of genes in single-copy orthogroups is 0. Number of genes in true multicopy orthogroups is 0. Number of genes in false multicopy orthogroups is 0.\n")
 		f_AG_calculation_results.write("AG is 0")
 		sys.exit()
 
-	f_logs.write("AG is " + str(n_AG) + ". Number of genes in single-copy orthogroups is " + str(n_number_of_single_copy_genes_found_in_the_assembly) + ". Number of genes in true multicopy orthogroups is " + str(n_number_of_true_multicopy_genes) + ". Number of genes in false multicopy orthogroups is " + str(n_number_of_false_multicopy_genes) +".\n")
+	f_log.write("AG is " + str(n_AG) + ". Number of genes in single-copy orthogroups is " + str(n_number_of_single_copy_genes_found_in_the_assembly) + ". Number of genes in true multicopy orthogroups is " + str(n_number_of_true_multicopy_genes) + ". Number of genes in false multicopy orthogroups is " + str(n_number_of_false_multicopy_genes) +".\n")
 	f_AG_calculation_results.write("AG is " + str(n_AG))
 	
-	f_logs.close
+	f_log.close
 	
 	#Строю синаплот с покрытием генов.
-	os.system("python3 " + s_path_to_the_folder_where_Mabs_lies + "/Additional/plot_gene_coverage_distribution.py " + s_path_to_the_output_folder + "/logs.txt 2.5 auto " + s_path_to_the_output_folder + "/gene_coverage_distribution")
+	os.system("python3 " + s_path_to_the_folder_where_Mabs_lies + "/Additional/plot_gene_coverage_distribution.py " + s_path_to_the_output_folder + "/log.txt 2.5 auto " + s_path_to_the_output_folder + "/gene_coverage_distribution")
 
 
diff --git a/install.sh b/install.sh
index 85cad71..52ceda6 100755
--- a/install.sh
+++ b/install.sh
@@ -69,7 +69,6 @@ cd ./Additional/HMMER
 chmod 755 ./configure
 ./configure
 make
-make check
 cd ../..
 
 #MetaEuk is pre-compiled, I just change permissions. The pre-compiled version is for SSE4.1. Actually, there are MetaEuk versions for newer CPUs, but since MetaEuk is not a time-limiting step of Mabs, I don't provide them.
@@ -96,14 +95,5 @@ cd ../..
 #Proovframe is written in Perl, it does not require to be compiled. I just change permissions. The source code of Proovframe was slightly modified by me — mostly for Proovframe to be able to find DIAMOND provided with Mabs.
 chmod 755 ./Additional/Proovframe/bin/*
 
-#Upgrading pip to the latest version. This is necessary because the installation of Plotnine (see below) may fail with very old versions of pip.
-python3 -m pip install pip --user --upgrade --no-warn-script-location
-
-#Installing the Python module Pandas.
-python3 -m pip install --user --no-warn-script-location Pandas
-
-#Installing the Python module Plotnine. Mabs may not work with old (approximately prior to 2019) versions of Plotnine, thus Plotnine is upgraded to the latest version.
-python3 -m pip install --upgrade --user --no-warn-script-location Plotnine
-
 #Making mabs-hifiasm.py, mabs-flye.py and calculate_AG.py executable
 chmod 755 mabs-hifiasm.py mabs-flye.py calculate_AG.py
\ No newline at end of file
diff --git a/mabs-flye.py b/mabs-flye.py
index 96fa354..be6a7d9 100755
--- a/mabs-flye.py
+++ b/mabs-flye.py
@@ -19,12 +19,17 @@
 import sys
 import os
 import re
+import time
 import datetime
 import urllib.request
 #import ssl
 import math
 import shutil
 import subprocess
+import gzip
+import statistics
+import scipy
+import scipy.optimize
 from Additional import mabs_function_preprocess_busco_dataset
 
 
@@ -92,7 +97,6 @@
 	if not os.path.isdir(s_path_to_the_folder_where_Mabs_lies + "/Test_datasets"):
 		l_unavailable_files_and_folders.append("The subfolder \"Test_datasets\" should be in the folder where Mabs lies.")
 
-
 	#делаю парсинг аргументов командной строки. Можно было бы использовать argparse, но когда я делаю это без библиотек, то больше возможностей для того, чтобы сделать интерфейс таким, какой мне нравится.
 
 	s_command_line = " ".join(sys.argv) #команда, которой запущен Mabs-flye, в одну строку.
@@ -109,10 +113,11 @@
 	s_genome_size_estimate = "auto" #оценка размера генома.
 
 	s_number_of_busco_orthogroups_to_use = "1000" #сколько ортогрупп BUSCO использовать. Это строка, содержащая или число, или слово "all", если нужно использовать все. Если пользователь укажет больше, чем есть в используемой базе данных BUSCO, то Mabs-flye всё равно будет использовать все.
+	n_maximum_number_of_points_to_try = 10 #максимальное количество точек, которые Mabs-flye должен пробовать в процессе оптимизации методом Нелдера-Мида.
 	s_maximum_allowed_intron_length = "from_BUSCO" #максимальная разрешённая длина интрона. По умолчанию, используется значение из файла dataset.cfg датасета BUSCO.
 	s_additional_flye_parameters = "" #дополнительные параметры Flye.
 	
-	s_Mabs_version = "2.19"
+	s_Mabs_version = "2.24"
 
 	l_errors_in_command_line = [] #список ошибок в командной строке. Если пользователь совершил много ошибок, то Mabs-flye напишет про них все, а не только про первую встреченную.
 
@@ -131,10 +136,11 @@
 5) --threads        Number of CPU threads to be used by Mabs-flye. The default value is 10.
 6) --output_folder        Output folder for Mabs-flye results. The default is "Mabs_results".
 7) --number_of_busco_orthogroups        How many BUSCO orthogroups should Mabs-flye use. Should be either a positive integral value or "all" to use all orthogroups. The default value is 1000. 
-8) --genome_size		Haploid genome size. Should be either "auto" for automatic estimation, or a number ending with "k", "m" or "g". For example, 1.5g means 1.5 gigabases. The default value is "auto".
-9) --max_intron_length        Maximum allowed length of an intron. Should be either "from_BUSCO" to use a value from a BUSCO dataset, or a number, possibly ending with "k", "m" or "g". For example, 20k means 20 kilobases. The default is "from_BUSCO". Change --max_intron_length if you assemble a genome with unusually long introns.
-10) --local_busco_dataset        Path to a local BUSCO dataset, manually pre-downloaded from http://mikeshelk.site/Data/BUSCO_datasets/Latest/ or http://busco-data.ezlab.org/v5/data/lineages/. Example: "--local_busco_dataset /home/test/Data/primates_odb10.2021-02-19.tar.gz". May be a .tar.gz file or a decompressed folder. This option is mutually exclusive with "--download_busco_dataset".
-11) --additional_flye_parameters        A string with additional parameters to be passed to Flye, enclosed in square brackets. Example: "--additional_flye_parameters [--scaffold --min-overlap 20000]".
+8) --maximum_number_of_points_to_try        The maximum number of combinations of Flye parameters to be tried by Mabs-flye using the Nelder-Mead algorithm. The default value is 10. Increasing the value of this parameter will increase the computation time but may increase the accuracy of the results.
+9) --genome_size		Haploid genome size. Should be either "auto" for automatic estimation, or a number ending with "k", "m" or "g". For example, 1.5g means 1.5 gigabases. The default value is "auto".
+10) --max_intron_length        Maximum allowed length of an intron. Should be either "from_BUSCO" to use a value from a BUSCO dataset, or a number, possibly ending with "k", "m" or "g". For example, 20k means 20 kilobases. The default is "from_BUSCO". Change --max_intron_length if you assemble a genome with unusually long introns.
+11) --local_busco_dataset        Path to a local BUSCO dataset, manually pre-downloaded from http://mikeshelk.site/Data/BUSCO_datasets/Latest/ or http://busco-data.ezlab.org/v5/data/lineages/. Example: "--local_busco_dataset /home/test/Data/primates_odb10.2021-02-19.tar.gz". May be a .tar.gz file or a decompressed folder. This option is mutually exclusive with "--download_busco_dataset".
+12) --additional_flye_parameters        A string with additional parameters to be passed to Flye, enclosed in square brackets. Example: "--additional_flye_parameters [--scaffold --min-overlap 20000]".
 
 Informational options:
 12) --help        Print this help.
@@ -186,7 +192,7 @@
 		
 		s_path_to_a_local_busco_dataset = s_path_to_the_output_folder + "/" + s_busco_dataset_name_online #путь к месту, где будет лежать скачанный архивированный gzip файл с датасетом BUSCO.
 		
-		#проверяю, доступен ли адрес http://mikeshelk.site/Data/BUSCO_datasets/Latest/. Он может быть недоступен из-за каких-то проблем с сервером. Если не доступен, то рекомендую пользователю скачать базу с http://busco-data.ezlab.org/v5/data/lineages/ и использовать опцию --local_busco_dataset. Проверку делаю примерно как написано на https://stackoverflow.com/questions/1949318/checking-if-a-website-is-up-via-python . А если доступен, то делаю ещё одну проверку — на то, есть ли нужный файл в папке http://mikeshelk.site/Data/BUSCO_datasets/Latest/
+		#проверяю, доступен ли адрес http://mikeshelk.site/Data/BUSCO_datasets/Latest/. Он может быть недоступен из-за каких-то проблем с сервером. Если не доступен, то пробую ещё два раза с интервалом в 5 секунд. Если адрес так и не станет доступным, то рекомендую пользователю скачать базу с http://busco-data.ezlab.org/v5/data/lineages/ и использовать опцию --local_busco_dataset. Проверку делаю примерно как написано на https://stackoverflow.com/questions/1949318/checking-if-a-website-is-up-via-python . А если доступен, то делаю ещё одну проверку — на то, есть ли нужный файл в папке http://mikeshelk.site/Data/BUSCO_datasets/Latest/
 		try:
 			s_dummy_variable = urllib.request.urlopen("http://mikeshelk.site/Data/BUSCO_datasets/Latest/").getcode()
 			
@@ -197,7 +203,28 @@
 				l_errors_in_command_line.append("The file " + s_busco_dataset_name_online + " does not exist at http://mikeshelk.site/Data/BUSCO_datasets/Latest/ .")
 
 		except:
-			l_errors_in_command_line.append("Unfortunately, http://mikeshelk.site/Data/BUSCO_datasets/Latest/ is currently not accessible. To test Mabs-flye, download the file http://busco-data.ezlab.org/v5/data/lineages/saccharomycetes_odb10.2020-08-05.tar.gz and run the following command:\nmabs-flye.py --nanopore_reads [PATH TO THE FOLDER WITH MABS]/Test_datasets/nanopore_test_reads.fastq.gz --pacbio_clr_reads [PATH TO THE FOLDER WITH MABS]/Test_datasets/pacbio_clr_test_reads.fastq.gz --local_busco_dataset saccharomycetes_odb10.2020-08-05.tar.gz")
+			time.sleep(5)
+			try:
+				s_dummy_variable = urllib.request.urlopen("http://mikeshelk.site/Data/BUSCO_datasets/Latest/").getcode()
+				#проверяю, доступен ли нужный файл, и если доступен, то качаю его.
+				try:
+					urllib.request.urlretrieve("http://mikeshelk.site/Data/BUSCO_datasets/Latest/" + s_busco_dataset_name_online, s_path_to_a_local_busco_dataset)
+				except:
+					l_errors_in_command_line.append("The file " + s_busco_dataset_name_online + " does not exist at http://mikeshelk.site/Data/BUSCO_datasets/Latest/ .")
+
+			except:
+				time.sleep(5)
+				try:
+					s_dummy_variable = urllib.request.urlopen("http://mikeshelk.site/Data/BUSCO_datasets/Latest/").getcode()
+					#проверяю, доступен ли нужный файл, и если доступен, то качаю его.
+					try:
+						urllib.request.urlretrieve("http://mikeshelk.site/Data/BUSCO_datasets/Latest/" + s_busco_dataset_name_online, s_path_to_a_local_busco_dataset)
+
+					except:
+						l_errors_in_command_line.append("http://mikeshelk.site/Data/BUSCO_datasets/Latest/ is not accessible. Please, download a BUSCO dataset from http://busco-data.ezlab.org/v5/data/lineages/ and use \"--local_busco_dataset\" instead of \"--download_busco_dataset\".")
+
+				except:
+					l_errors_in_command_line.append("Unfortunately, http://mikeshelk.site/Data/BUSCO_datasets/Latest/ is currently not accessible. To test Mabs-flye, download the file http://busco-data.ezlab.org/v5/data/lineages/saccharomycetes_odb10.2020-08-05.tar.gz and run the following command:\nmabs-flye.py --nanopore_reads [PATH TO THE FOLDER WITH MABS]/Test_datasets/nanopore_test_reads.fastq.gz --pacbio_clr_reads [PATH TO THE FOLDER WITH MABS]/Test_datasets/pacbio_clr_test_reads.fastq.gz --local_busco_dataset saccharomycetes_odb10.2020-08-05.tar.gz")
 		
 		if len(l_errors_in_command_line) != 0:
 			#Если ошибка была всего одна.
@@ -235,29 +262,39 @@
 			s_string_to_remove = re.escape(o_regular_expression_results.group(0))
 			s_command_line_reduced = re.sub(s_string_to_remove, "", s_command_line_reduced, 1)
 		
-		#проверяю, что пользователь не дал опцией --additional_flye_parameters следующие опции: --nano-raw, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative . Это потому, что Mabs-flye их и так использует.
+		#проверяю, что пользователь не дал опцией --additional_flye_parameters следующие опции: --nano-raw, --nano-corr, --nano-hq, --pacbio-hifi, --pacbio-raw, --pacbio-corr, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative . Это потому, что Mabs-flye их и так использует.
 		if re.search(r"\-\-nano\-raw ", s_additional_flye_parameters):
-			l_errors_in_command_line.append("You have given Mabs-flye the option \"--nano-raw\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
+			l_errors_in_command_line.append("You have given Mabs-flye the option \"--nano-raw\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --nano-corr, --nano-hq, --pacbio-hifi, --pacbio-raw, --pacbio-corr, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
+		if re.search(r"\-\-nano\-corr ", s_additional_flye_parameters):
+			l_errors_in_command_line.append("You have given Mabs-flye the option \"--nano-corr\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --nano-corr, --nano-hq, --pacbio-hifi, --pacbio-raw, --pacbio-corr, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
+		if re.search(r"\-\-nano\-hq ", s_additional_flye_parameters):
+			l_errors_in_command_line.append("You have given Mabs-flye the option \"--nano-hq\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --nano-corr, --nano-hq, --pacbio-hifi, --pacbio-raw, --pacbio-corr, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
+		if re.search(r"\-\-pacbio\-hifi ", s_additional_flye_parameters):
+			l_errors_in_command_line.append("You have given Mabs-flye the option \"--pacbio-hifi\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --nano-corr, --nano-hq, --pacbio-hifi, --pacbio-raw, --pacbio-corr, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
+		if re.search(r"\-\-pacbio\-raw ", s_additional_flye_parameters):
+			l_errors_in_command_line.append("You have given Mabs-flye the option \"--pacbio-raw\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --nano-corr, --nano-hq, --pacbio-hifi, --pacbio-raw, --pacbio-corr, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
+		if re.search(r"\-\-pacbio\-corr ", s_additional_flye_parameters):
+			l_errors_in_command_line.append("You have given Mabs-flye the option \"--pacbio-corr\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --nano-corr, --nano-hq, --pacbio-hifi, --pacbio-raw, --pacbio-corr, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
 		if re.search(r"\-\-out\-dir ", s_additional_flye_parameters):
-			l_errors_in_command_line.append("You have given Mabs-flye the option \"--out-dir\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
+			l_errors_in_command_line.append("You have given Mabs-flye the option \"--out-dir\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --nano-corr, --nano-hq, --pacbio-hifi, --pacbio-raw, --pacbio-corr, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
 		if re.search(r"\-\-threads ", s_additional_flye_parameters):
-			l_errors_in_command_line.append("You have given Mabs-flye the option \"--threads\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
+			l_errors_in_command_line.append("You have given Mabs-flye the option \"--threads\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --nano-corr, --nano-hq, --pacbio-hifi, --pacbio-raw, --pacbio-corr, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
 		if re.search(r"\-\-no\-alt\-contigs ", s_additional_flye_parameters):
-			l_errors_in_command_line.append("You have given Mabs-flye the option \"--no-alt-contigs\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
+			l_errors_in_command_line.append("You have given Mabs-flye the option \"--no-alt-contigs\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --nano-corr, --nano-hq, --pacbio-hifi, --pacbio-raw, --pacbio-corr, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
 		if re.search(r"\-\-nano\-raw ", s_additional_flye_parameters):
-			l_errors_in_command_line.append("You have given Mabs-flye the option \"--genome-size\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
+			l_errors_in_command_line.append("You have given Mabs-flye the option \"--genome-size\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --nano-corr, --nano-hq, --pacbio-hifi, --pacbio-raw, --pacbio-corr, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
 		if re.search(r"\-o ", s_additional_flye_parameters):
-			l_errors_in_command_line.append("You have given Mabs-flye the option \"-o\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
+			l_errors_in_command_line.append("You have given Mabs-flye the option \"-o\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --nano-corr, --nano-hq, --pacbio-hifi, --pacbio-raw, --pacbio-corr, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
 		if re.search(r"\-t ", s_additional_flye_parameters):
-			l_errors_in_command_line.append("You have given Mabs-flye the option \"-t\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
+			l_errors_in_command_line.append("You have given Mabs-flye the option \"-t\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --nano-corr, --nano-hq, --pacbio-hifi, --pacbio-raw, --pacbio-corr, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
 		if re.search(r"\-g ", s_additional_flye_parameters):
-			l_errors_in_command_line.append("You have given Mabs-flye the option \"-g\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
+			l_errors_in_command_line.append("You have given Mabs-flye the option \"-g\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --nano-corr, --nano-hq, --pacbio-hifi, --pacbio-raw, --pacbio-corr, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
 		if re.search(r"assemble_ovlp_divergence ", s_additional_flye_parameters):
-			l_errors_in_command_line.append("You have given Mabs-flye the option \"assemble_ovlp_divergence\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
+			l_errors_in_command_line.append("You have given Mabs-flye the option \"assemble_ovlp_divergence\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --nano-corr, --nano-hq, --pacbio-hifi, --pacbio-raw, --pacbio-corr, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
 		if re.search(r"repeat_graph_ovlp_divergence ", s_additional_flye_parameters):
-			l_errors_in_command_line.append("You have given Mabs-flye the option \"repeat_graph_ovlp_divergence\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
+			l_errors_in_command_line.append("You have given Mabs-flye the option \"repeat_graph_ovlp_divergence\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --nano-corr, --nano-hq, --pacbio-hifi, --pacbio-raw, --pacbio-corr, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
 		if re.search(r"assemble_divergence_relative ", s_additional_flye_parameters):
-			l_errors_in_command_line.append("You have given Mabs-flye the option \"assemble_divergence_relative\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
+			l_errors_in_command_line.append("You have given Mabs-flye the option \"assemble_divergence_relative\" via the option \"--additional_flye_parameters\". The following options cannot be passed via \"--additional_flye_parameters\": --nano-raw, --nano-corr, --nano-hq, --pacbio-hifi, --pacbio-raw, --pacbio-corr, --out-dir, --threads, --no-alt-contigs, --genome-size, -o, -t, -g, assemble_ovlp_divergence, repeat_graph_ovlp_divergence, assemble_divergence_relative.")
 		
 		#смотрю, дал ли пользователь риды Нанопора
 		o_regular_expression_results = re.search(r" --nanopore_reads (\S+)", s_command_line_reduced)
@@ -330,7 +367,7 @@
 			
 			s_path_to_a_local_busco_dataset = s_path_to_the_output_folder + "/" + s_busco_dataset_name_online #путь к месту, где будет лежать скачанный архивированный gzip файл с датасетом BUSCO.
 		
-			#проверяю, доступен ли адрес http://mikeshelk.site/Data/BUSCO_datasets/Latest/. Он может быть недоступен из-за каких-то проблем с сервером. Если не доступен, то рекомендую пользователю скачать базу с http://busco-data.ezlab.org/v5/data/lineages/ и использовать опцию --local_busco_dataset. Проверку делаю примерно как написано на https://stackoverflow.com/questions/1949318/checking-if-a-website-is-up-via-python . А если доступен, то делаю ещё одну проверку — на то, есть ли нужный файл в папке http://mikeshelk.site/Data/BUSCO_datasets/Latest/
+			#проверяю, доступен ли адрес http://mikeshelk.site/Data/BUSCO_datasets/Latest/. Если не доступен, то пробую ещё два раза с интервалом в 5 секунд. Если адрес так и не станет доступным, то рекомендую пользователю скачать базу с http://busco-data.ezlab.org/v5/data/lineages/ и использовать опцию --local_busco_dataset. Проверку делаю примерно как написано на https://stackoverflow.com/questions/1949318/checking-if-a-website-is-up-via-python . А если доступен, то делаю ещё одну проверку — на то, есть ли нужный файл в папке http://mikeshelk.site/Data/BUSCO_datasets/Latest/
 			try:
 				s_dummy_variable = urllib.request.urlopen("http://mikeshelk.site/Data/BUSCO_datasets/Latest/").getcode()
 				
@@ -341,7 +378,27 @@
 					l_errors_in_command_line.append("The file " + s_busco_dataset_name_online + " does not exist at http://mikeshelk.site/Data/BUSCO_datasets/Latest/ .")
 
 			except:
-				l_errors_in_command_line.append("http://mikeshelk.site/Data/BUSCO_datasets/Latest/ is not accessible. Please, download a BUSCO dataset from http://busco-data.ezlab.org/v5/data/lineages/ and use \"--local_busco_dataset\" instead of \"--download_busco_dataset\".")
+				time.sleep(5)
+				try:
+					s_dummy_variable = urllib.request.urlopen("http://mikeshelk.site/Data/BUSCO_datasets/Latest/").getcode()
+					#проверяю, доступен ли нужный файл, и если доступен, то качаю его.
+					try:
+						urllib.request.urlretrieve("http://mikeshelk.site/Data/BUSCO_datasets/Latest/" + s_busco_dataset_name_online, s_path_to_a_local_busco_dataset)
+					except:
+						l_errors_in_command_line.append("The file " + s_busco_dataset_name_online + " does not exist at http://mikeshelk.site/Data/BUSCO_datasets/Latest/ .")
+
+				except:
+					time.sleep(5)
+					try:
+						s_dummy_variable = urllib.request.urlopen("http://mikeshelk.site/Data/BUSCO_datasets/Latest/").getcode()
+						#проверяю, доступен ли нужный файл, и если доступен, то качаю его.
+						try:
+							urllib.request.urlretrieve("http://mikeshelk.site/Data/BUSCO_datasets/Latest/" + s_busco_dataset_name_online, s_path_to_a_local_busco_dataset)
+						except:
+							l_errors_in_command_line.append("The file " + s_busco_dataset_name_online + " does not exist at http://mikeshelk.site/Data/BUSCO_datasets/Latest/ .")
+
+					except:
+						l_errors_in_command_line.append("http://mikeshelk.site/Data/BUSCO_datasets/Latest/ is not accessible. Please, download a BUSCO dataset from http://busco-data.ezlab.org/v5/data/lineages/ and use \"--local_busco_dataset\" instead of \"--download_busco_dataset\".")
 		
 		#если пользователь использовал --local_busco_dataset
 		o_regular_expression_results = re.search(r" --local_busco_dataset (\S+)", s_command_line_reduced)
@@ -374,6 +431,14 @@
 			s_string_to_remove = re.escape(o_regular_expression_results.group(0))
 			s_command_line_reduced = re.sub(s_string_to_remove, "", s_command_line_reduced, 1)
 		
+		#смотрю, указал ли пользователь в командной строке максимальное количество точек, которые нужно попробовать с помощью метода Нелдера-Мида.
+		o_regular_expression_results = re.search(r" --maximum_number_of_points_to_try ([\d\.\+]+)", s_command_line_reduced)
+		if o_regular_expression_results:
+			n_maximum_number_of_points_to_try = int(o_regular_expression_results.group(1))
+			
+			s_string_to_remove = re.escape(o_regular_expression_results.group(0))
+			s_command_line_reduced = re.sub(s_string_to_remove, "", s_command_line_reduced, 1)
+		
 		#смотрю, указал ли пользователь в командной строке размер генома. Разрешается три варианта формата: число, число с [kmgKMG] на конце, "auto".
 		o_regular_expression_results = re.search(r" --genome_size ([\d\.eE\-\+]+[kmgKMG]?|auto)", s_command_line_reduced)
 		if o_regular_expression_results:
@@ -454,19 +519,24 @@
 			
 			sys.exit()
 
-	f_logs = open(s_path_to_the_output_folder + "/mabs_logs.txt","w",buffering=1) #f_logs это общий файл с логами Mabs-flye, в отличие от трёх дополнительных файлов с логами, которые ведут три отдельных экземпляра Mabs-flye. buffering=1 означает, что буферизация идёт только на уровне строк.
+	f_log = open(s_path_to_the_output_folder + "/mabs_log.txt","w",buffering=1) #f_log это общий файл с логами Mabs-flye, в отличие от трёх дополнительных файлов с логами, которые ведут три отдельных экземпляра Mabs-flye. buffering=1 означает, что буферизация идёт только на уровне строк.
 	o_current_time_and_date = datetime.datetime.now()
 	s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
-	f_logs.write(s_current_time_and_date + "\n")
-	f_logs.write("Started Mabs-flye\n\n")
-
-	f_logs.write("You have run Mabs-flye of version " + s_Mabs_version + " with the following command: " + s_command_line + "\n\n")
+	f_log.write(s_current_time_and_date + "\n")
+	f_log.write("Started Mabs-flye\n\n")
 
+	f_log.write("You have run Mabs-flye of version " + s_Mabs_version + " with the following command: " + s_command_line + "\n\n")
+	
+	#Если пользователь дал больше 128 потоков, то ограничиваю количество потоков 128-ю, потому что Flye при попытке использовать больше 128 потоков вылетает с ошибкой, говоря, что не может использовать больше 128.
+	if n_number_of_cpu_threads_to_use > 128:
+		n_number_of_cpu_threads_to_use = 128
+		f_log.write("Warning: you indicated Mabs-flye to use " + str(n_number_of_cpu_threads_to_use) + " CPU threads. However, Flye cannot use more than 128 threads. Hence, the number of used threads will be limited to 128\n\n")
+	
 	#если пользователь делает сборку тестового набора ридов Mabs-flye, то нужно написать подробности этого тестового набора.
 	if (len(sys.argv) == 2) and re.search(r"\s\-\-run_test", s_command_line):
-		f_logs.write("As a test, Mabs-flye will assemble the first chromosome of Saccharomyces cerevisiae, which is approximately 200 kbp long, using 20x Nanopore reads and 10x PacBio CLR reads.\n\n")
-		f_logs.write("The command \"mabs-flye.py --run_test\" is equivalent to the command \"mabs-flye.py --nanopore_reads " + s_path_to_the_folder_where_Mabs_lies + "/Test_datasets/nanopore_test_reads.fastq.gz --pacbio_clr_reads " + s_path_to_the_folder_where_Mabs_lies + "/Test_datasets/pacbio_clr_test_reads.fastq.gz --download_busco_dataset saccharomycetes_odb10.2020-08-05.tar.gz\"\n")
-		f_logs.write("If after Mabs-flye finishes you see a file ./Mabs_results/The_best_assembly/assembly.fasta which has a size of approximately 200 kilobytes, then the test succeeded.\n\n")
+		f_log.write("As a test, Mabs-flye will assemble the first chromosome of Saccharomyces cerevisiae, which is approximately 200 kbp long, using 20x Nanopore reads and 10x PacBio CLR reads.\n\n")
+		f_log.write("The command \"mabs-flye.py --run_test\" is equivalent to the command \"mabs-flye.py --nanopore_reads " + s_path_to_the_folder_where_Mabs_lies + "/Test_datasets/nanopore_test_reads.fastq.gz --pacbio_clr_reads " + s_path_to_the_folder_where_Mabs_lies + "/Test_datasets/pacbio_clr_test_reads.fastq.gz --download_busco_dataset saccharomycetes_odb10.2020-08-05.tar.gz\"\n")
+		f_log.write("If after Mabs-flye finishes you see a file ./Mabs_results/The_best_assembly/assembly.fasta which has a size of approximately 200 kilobytes, then the test succeeded.\n\n")
 	
 	#если пользователь сказал скачать файл с базой BUSCO или сам дал файл (но не папку), то разархивирую файл и меняю значение переменной s_path_to_a_local_busco_dataset с пути к файлу на путь к папке.
 	if os.path.isfile(s_path_to_a_local_busco_dataset):
@@ -479,7 +549,7 @@
 		
 			
 	#Оставляю из базы BUSCO только нужное количество (s_number_of_busco_orthogroups_to_use) ортогрупп — тех, которые имеют наиболее консервативные последовательности. Если пользователь указал использовать все ортогруппы, то Mabs-flye использует все. Если пользователь указал больше ортогрупп, чем есть в этом наборе BUSCO, то Mabs-flye использует все и пишет Warning в основной файл с логами.
-	mabs_function_preprocess_busco_dataset.function_preprocess_busco_dataset(s_path_to_a_local_busco_dataset, s_number_of_busco_orthogroups_to_use, s_path_to_the_output_folder, f_logs)
+	mabs_function_preprocess_busco_dataset.function_preprocess_busco_dataset(s_path_to_a_local_busco_dataset, s_number_of_busco_orthogroups_to_use, s_path_to_the_output_folder, f_log)
 
 	#делаю ссылку на файл "ancestral", давая ему расширение .fasta. Затем делаю базу данных DIAMOND.
 	#с помощью os.path.abspath() я получают абсолютный путь. Если он относительный, то это может создать проблемы в работоспособности мягкой ссылки.
@@ -508,6 +578,11 @@
 		else:
 			s_output_extension = "fasta"
 		
+		#Проверяю, что DIAMOND выдал файл. Файла может не быть, если у DIAMOND были какие-то проблемы при запуске (см. https://github.com/shelkmike/Mabs/issues/3)
+		if not os.path.exists(s_path_to_the_output_folder + "/diamond_results_for_alignment_of_nanopore_reads_to_busco_proteins.txt"):
+			print("Mabs-flye has stopped because there was an error during DIAMOND execution.")
+			sys.exit()
+		
 		os.system("python3 " + s_path_to_the_folder_where_Mabs_lies + "/Additional/get_single_end_reads_from_DIAMOND_results.py " + s_path_to_nanopore_reads + " " + s_path_to_the_output_folder + "/diamond_results_for_alignment_of_nanopore_reads_to_busco_proteins.txt " + s_path_to_the_output_folder + "/nanopore_reads_that_have_matches_to_busco_proteins." + s_output_extension)
 		
 		s_path_to_nanopore_reads_that_correspond_to_busco_genes = s_path_to_the_output_folder + "/nanopore_reads_that_have_matches_to_busco_proteins." + s_output_extension
@@ -523,6 +598,11 @@
 		else:
 			s_output_extension = "fasta"
 		
+		#Проверяю, что DIAMOND выдал файл. Файла может не быть, если у DIAMOND были какие-то проблемы при запуске (см. https://github.com/shelkmike/Mabs/issues/3)
+		if not os.path.exists(s_path_to_the_output_folder + "/diamond_results_for_alignment_of_pacbio_hifi_reads_to_busco_proteins.txt"):
+			print("Mabs-flye has stopped because there was an error during DIAMOND execution.")
+			sys.exit()
+		
 		os.system("python3 " + s_path_to_the_folder_where_Mabs_lies + "/Additional/get_single_end_reads_from_DIAMOND_results.py " + s_path_to_pacbio_hifi_reads + " " + s_path_to_the_output_folder + "/diamond_results_for_alignment_of_pacbio_hifi_reads_to_busco_proteins.txt " + s_path_to_the_output_folder + "/pacbio_hifi_reads_that_have_matches_to_busco_proteins." + s_output_extension)
 		
 		s_path_to_pacbio_hifi_reads_that_correspond_to_busco_genes = s_path_to_the_output_folder + "/pacbio_hifi_reads_that_have_matches_to_busco_proteins." + s_output_extension
@@ -538,6 +618,11 @@
 		else:
 			s_output_extension = "fasta"
 		
+		#Проверяю, что DIAMOND выдал файл. Файла может не быть, если у DIAMOND были какие-то проблемы при запуске (см. https://github.com/shelkmike/Mabs/issues/3)
+		if not os.path.exists(s_path_to_the_output_folder + "/diamond_results_for_alignment_of_pacbio_clr_reads_to_busco_proteins.txt"):
+			print("Mabs-flye has stopped because there was an error during DIAMOND execution.")
+			sys.exit()
+		
 		os.system("python3 " + s_path_to_the_folder_where_Mabs_lies + "/Additional/get_single_end_reads_from_DIAMOND_results.py " + s_path_to_pacbio_clr_reads + " " + s_path_to_the_output_folder + "/diamond_results_for_alignment_of_pacbio_clr_reads_to_busco_proteins.txt " + s_path_to_the_output_folder + "/pacbio_clr_reads_that_have_matches_to_busco_proteins." + s_output_extension)
 		
 		s_path_to_pacbio_clr_reads_that_correspond_to_busco_genes = s_path_to_the_output_folder + "/pacbio_clr_reads_that_have_matches_to_busco_proteins." + s_output_extension
@@ -617,7 +702,7 @@
 	if (s_path_to_nanopore_reads != "") and (s_path_to_pacbio_hifi_reads != "") and (s_path_to_pacbio_clr_reads != ""):
 		
 		#если хотя бы один из наборов ридов был в формате FASTA.
-		if (not re.search(r"(\.fastq|\.fq|\.fastq\.gz|\.fq\.gz)$", s_path_to_pacbio_hifi_reads, flags = re.IGNORECASE)) or (not re.search(r"(\.fastq|\.fq|\.fastq\.gz|\.fq\.gz)$", s_path_to_pacbio_clr_reads, flags = re.IGNORECASE)):
+		if (not re.search(r"(\.fastq|\.fq|\.fastq\.gz|\.fq\.gz)$", s_path_to_nanopore_reads, flags = re.IGNORECASE)) or (not re.search(r"(\.fastq|\.fq|\.fastq\.gz|\.fq\.gz)$", s_path_to_pacbio_hifi_reads, flags = re.IGNORECASE)) or (not re.search(r"(\.fastq|\.fq|\.fastq\.gz|\.fq\.gz)$", s_path_to_pacbio_clr_reads, flags = re.IGNORECASE)):
 			subprocess.call(["bash", "-c", "cat <(" + s_path_to_the_folder_where_Mabs_lies + "/Additional/SeqTk/seqtk seq -a " + s_path_to_nanopore_reads + ") <(" + s_path_to_the_folder_where_Mabs_lies + "/Additional/SeqTk/seqtk seq -a " + s_path_to_pacbio_hifi_reads + ") <(" + s_path_to_the_folder_where_Mabs_lies + "/Additional/SeqTk/seqtk seq -a " + s_path_to_pacbio_clr_reads + ") | gzip -1 > " + s_path_to_the_output_folder + "/all_long_reads.fasta.gz"])
 			s_path_to_the_file_with_all_long_reads = s_path_to_the_output_folder + "/all_long_reads.fasta.gz"
 		#если все наборы ридов были в формате FASTQ.
@@ -643,7 +728,7 @@
 	#если пользователь дал риды Нанопора и PacBio CLR.
 	if (s_path_to_nanopore_reads != "") and (s_path_to_pacbio_hifi_reads == "") and (s_path_to_pacbio_clr_reads != ""):
 		#если хотя бы один из наборов ридов был в формате FASTA.
-		if (not re.search(r"(\.fastq|\.fq|\.fastq\.gz|\.fq\.gz)$", s_path_to_pacbio_hifi_reads, flags = re.IGNORECASE)) or (not re.search(r"(\.fastq|\.fq|\.fastq\.gz|\.fq\.gz)$", s_path_to_pacbio_clr_reads, flags = re.IGNORECASE)):
+		if (not re.search(r"(\.fastq|\.fq|\.fastq\.gz|\.fq\.gz)$", s_path_to_nanopore_reads, flags = re.IGNORECASE)) or (not re.search(r"(\.fastq|\.fq|\.fastq\.gz|\.fq\.gz)$", s_path_to_pacbio_clr_reads, flags = re.IGNORECASE)):
 			subprocess.call(["bash", "-c", "cat <(" + s_path_to_the_folder_where_Mabs_lies + "/Additional/SeqTk/seqtk seq -a " + s_path_to_nanopore_reads + ") <(" + s_path_to_the_folder_where_Mabs_lies + "/Additional/SeqTk/seqtk seq -a " + s_path_to_pacbio_clr_reads + ") | gzip -1 > " + s_path_to_the_output_folder + "/all_long_reads.fasta.gz"])
 			s_path_to_the_file_with_all_long_reads = s_path_to_the_output_folder + "/all_long_reads.fasta.gz"
 		#если все наборы ридов были в формате FASTQ.
@@ -678,241 +763,205 @@
 	if (s_path_to_nanopore_reads == "") and (s_path_to_pacbio_hifi_reads == "") and (s_path_to_pacbio_clr_reads != ""):					
 		s_path_to_the_file_with_all_long_reads = s_path_to_pacbio_clr_reads
 		s_long_reads_option_for_calculate_AG = "--pacbio_clr_reads"
-
-
-	#Теперь, собственно, начинаю проверку 10 точек методом золотого сечения. n_point_1 это самая левая в данный момент точка (то есть, с наименьшим log10(max_divergence)), n_point_4 это самая правая (то есть, с наибольшим log10(max_divergence)), а n_point_2 и n_point_3 это две промежуточные, положение которых, собственно, и определяется золотым сечением.
-	#Любые риды я при сборке даю Flye как "nano-raw", то есть нескорректированные риды Нанопора, потому что если мне нужно коллапсировать области генома с очень высокой гетерозиготностью, то для Mabs-flye это примерно эквивалентно тому, что в ридах много ошибок секвенирования. Нужно будет подумать, насколько это правильно. Скрипту calculate_AG.py я тоже даю любые риды как риды Нанопора, то есть с опцией --nanopore_reads.
-	n_point_1 = 0.0001 #Нижняя граница пробуемых max_divergence. 0.0001 это 0.01%.
-	n_point_4 = 0.5 #Верхняя граница пробуемых max_divergence. 0.5 это 50%.
-	n_point_2 = round(10**(math.log10(n_point_1) + ((math.sqrt(5) - 1) / (math.sqrt(5) + 1))*(math.log10(n_point_4) - math.log10(n_point_1))), 6) #округлю до шестого знака после запятой, иначе у Питона иногда вылезают числа вроде 0.0001442000001
-	n_point_3 = round(10**(math.log10(n_point_4) - ((math.sqrt(5) - 1) / (math.sqrt(5) + 1))*(math.log10(n_point_4) - math.log10(n_point_1))), 6)
-
-	#Для 0.0001 и 0.5 я не делаю измерений, потому что метод золотого сечения этого не требует.
 	
-	#Это список, в который для каждого проверенного max_divergence будет записан AG. Ключ это max_divergence, а значение это AG.
-	d_max_divergence_to_AG = {} #Например, [0.123] = 762.
-
-	#Анализирую вторую точку.
-	n_number_of_the_point_under_analysis = 1
-	n_max_divergence = n_point_2
-
-	o_current_time_and_date = datetime.datetime.now()
-	s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
-	f_logs.write(s_current_time_and_date + "\n")
-	f_logs.write("Mabs-flye started to analyze point " + str(n_number_of_the_point_under_analysis) + " of 10. Max_divergence in this point is " + str(n_max_divergence) + "\n")
-
-	#если пользователь не указывал размер генома
-	if s_genome_size_estimate == "auto":
-		os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Flye/bin/flye --nano-raw " + s_path_to_all_long_reads_that_correspond_to_busco_genes + " --out-dir " + s_path_to_the_output_folder + "/Gene_assembly_for_max_divergence_" + str(n_max_divergence) + " --threads " + str(n_number_of_cpu_threads_to_use) + " --no-alt-contigs --extra-params assemble_ovlp_divergence=" + str(n_max_divergence) + ",repeat_graph_ovlp_divergence=" + str(n_max_divergence) + ",assemble_divergence_relative=0 " + s_additional_flye_parameters)
-	#если пользователь указал размер генома
+	"""
+	Теперь нужно определить, какой опцией Flye давать риды (--nano-raw, --nano-corr или другие). Если пользователь хотя бы один файл дал в формате FASTA, то использую --nano-raw. Если все риды в формате FASTQ, то я делаю следующее:
+	I) Считаю точность для каждого рида, используя строку с качеством. "Точность" выражается в процентах.
+	II) Считаю медианное значение по значениям из "I)"
+	III) В завимисимости от значения из "II)" выбираю, какой опцией давать риды программе Flye. Задавая эти числа, я ориентировался на описания опций в https://github.com/fenderglass/Flye/blob/flye/docs/USAGE.md
+	Соответствие между медианной точностью ридов, и выбранным режимом Flye:
+	(0; 95] - --nano-raw
+	(95; 97] - --nano-hq
+	(97; 99] - --nano-corr
+	(99; 100] - --pacbio-hifi
+	Если среди файлов с ридами, которые пользователь дал программе, хотя бы один в формате FASTA, то Mabs пишет в логи "WARNING: you have provided reads in FASTA, while FASTQ is recommended. Using reads in FASTA may reduce the accuracy of the assembly."
+	
+	Для скорости, медианная точность считается только по ридам, относящимся к генам BUSCO.
+	"""
+	
+	s_flye_option_to_provide_reads_with = "" #может быть "--nano-raw", "--nano-hq", "--nano-corr", "--pacbio-hifi".
+	
+	if not re.search(r"(\.fastq|\.fq|\.fastq\.gz|\.fq\.gz)$", s_path_to_all_long_reads_that_correspond_to_busco_genes, flags = re.IGNORECASE):
+		f_log.write("WARNING: you have provided reads in FASTA, while FASTQ is recommended. Using reads in FASTA may reduce the accuracy of the assembly.\n\n")
+		
+		s_flye_option_to_provide_reads_with = "--nano-raw"
 	else:
-		os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Flye/bin/flye --nano-raw " + s_path_to_all_long_reads_that_correspond_to_busco_genes + " --out-dir " + s_path_to_the_output_folder + "/Gene_assembly_for_max_divergence_" + str(n_max_divergence) + " --threads " + str(n_number_of_cpu_threads_to_use) + " --genome-size " + s_genome_size_estimate + " --no-alt-contigs --extra-params assemble_ovlp_divergence=" + str(n_max_divergence) + ",repeat_graph_ovlp_divergence=" + str(n_max_divergence) + ",assemble_divergence_relative=0 " + s_additional_flye_parameters)
+		#Считаю медианный Phred score ридов. Считаю, что риды в формате Phred+33, потому что Phred+64 для длинных ридов, по-моему, никогда не использовался.
 		
-	
-	#Смотрю, получился ли файл assembly.fasta. Его может не быть, если Flye не собрал ни одного дисджойнтига — в таком случае Flye прекращает работу преждевременно, не выдавая файла assembly.fasta. То, что Flye не выдал ни одного дисджойнтига, может быть связано с тем, что для ридов с большим количеством ошибок Mabs-flye попробовал очень маленький max_divergence. В случае, если файла assembly.fasta нет, я, даже не запуская скрипт calculate_AG.py, сразу считаю, что AG=0.
-	if not os.path.isfile(s_path_to_the_output_folder + "/Gene_assembly_for_max_divergence_" + str(n_max_divergence) + "/assembly.fasta"):
-		n_AG_for_point_2 = 0
-	else:	
-		
-		#"--number_of_busco_orthogroups all" использую потому, что в папке BUSCO_dataset_to_use уже оставлены только те ортогруппы, которые нужно использовать.
-		os.system("python3 " + s_path_to_the_folder_where_Mabs_lies + "/calculate_AG.py --output_folder " + s_path_to_the_output_folder + "/AG_calculation_for_max_divergence_" + str(n_max_divergence) + " --assembly " + s_path_to_the_output_folder + "/Gene_assembly_for_max_divergence_" + str(n_max_divergence) + "/assembly.fasta " + s_long_reads_option_for_calculate_AG + " " + s_path_to_all_long_reads_that_correspond_to_busco_genes + " --number_of_busco_orthogroups all --local_busco_dataset " + s_path_to_the_output_folder + "/BUSCO_dataset_to_use --use_proovframe true --max_intron_length " + s_maximum_allowed_intron_length + " --threads " + str(n_number_of_cpu_threads_to_use))
-
-		#Беру AG, посчитанный скриптом calculate_AG.py
-		if os.path.isfile(s_path_to_the_output_folder + "/AG_calculation_for_max_divergence_" + str(n_max_divergence) + "/AG.txt"):
-			f_infile = open(s_path_to_the_output_folder + "/AG_calculation_for_max_divergence_" + str(n_max_divergence) + "/AG.txt", "r")
-			s_line_1 = f_infile.readline()
-			#AG is 487
-			o_regular_expression_results = re.search(r"AG is (\d+)", s_line_1)
-			n_AG_for_point_2 = int(o_regular_expression_results.group(1))
+		if re.search(r"(\.gz)$", s_path_to_all_long_reads_that_correspond_to_busco_genes, flags = re.IGNORECASE):
+			f_infile = gzip.open(s_path_to_all_long_reads_that_correspond_to_busco_genes, mode = "rt")
+		else:
+			f_infile = open(s_path_to_all_long_reads_that_correspond_to_busco_genes, "r")
+		
+		#Список, элементы которого это точности ридов, выраженные в процентах. 100%, значит рид идеально точный. В списке по одному элементу на каждый рид.
+		l_accuracies_of_reads = []
+		
+		n_line_number = 0 #Номер строки. Считается от 1.
+		for s_line in f_infile:
+			n_line_number += 1
+			
+			if (n_line_number - 4) % 4 == 0:
+				s_quality_line = re.sub(r"[\r\n]+$", "", s_line)
+				
+				n_accuracy_of_the_read = 0 #Точность рида, в процентах. Сначала посчитаю просто как сумму точностей отдельных нуклеотидов, а потом поделю на длину рида.
+				
+				for s_character in s_quality_line:
+					n_Phred_score_corresponding_to_the_character = ord(s_character) - 33
+					n_accuracy_of_the_read += 100 * (1 - (10 ** (- n_Phred_score_corresponding_to_the_character / 10)))
+				
+				n_accuracy_of_the_read = n_accuracy_of_the_read / len(s_quality_line)
+				
+				l_accuracies_of_reads.append(n_accuracy_of_the_read)
+		
+		f_infile.close()
+		
+		n_median_accuracy_of_reads = statistics.median(l_accuracies_of_reads)
+		
+		if n_median_accuracy_of_reads <= 95:
+			s_flye_option_to_provide_reads_with = "--nano-raw"
+		elif (n_median_accuracy_of_reads > 95) and (n_median_accuracy_of_reads <= 97):
+			s_flye_option_to_provide_reads_with = "--nano-hq"
+		elif (n_median_accuracy_of_reads > 97) and (n_median_accuracy_of_reads <= 99):
+			s_flye_option_to_provide_reads_with = "--nano-corr"
+		elif n_median_accuracy_of_reads > 99:
+			s_flye_option_to_provide_reads_with = "--pacbio-hifi"
 		else:
-			f_logs.write("Error. Couldn't calculate AG. See stderr and stdout for the reason why.")
+			o_current_time_and_date = datetime.datetime.now()
+			s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
+			f_log.write(s_current_time_and_date + "\n")
+			f_log.write("A very strange error in parsing of FASTQ happened. Please, report at https://github.com/shelkmike/Mabs/issues .")
 			sys.exit()
+		
+		#Округляю медианную точность, чтобы показать её пользователю. Если точность <=90%, то округляю до целого. Если точность от 90% до 99%, то округляю до первого знака после запятой. Если точность >99%, то округляю до второго знака после запятой.
+		if n_median_accuracy_of_reads <= 90:
+			n_median_accuracy_of_reads__rounded = int(round(n_median_accuracy_of_reads, 0))
+		elif (n_median_accuracy_of_reads > 90) and (n_median_accuracy_of_reads <= 99):
+			n_median_accuracy_of_reads__rounded = round(n_median_accuracy_of_reads, 1)
+		else:
+			n_median_accuracy_of_reads__rounded = round(n_median_accuracy_of_reads, 2)
+		
+		f_log.write("The median accuracy of reads has been estimated as approximately " + str(n_median_accuracy_of_reads__rounded) + "%. The reads will be provided to Flye via the option \"" + s_flye_option_to_provide_reads_with + "\".\n\n")
+		
+	
+	#Теперь делаю сборку с дефолтными значениями параметров Flye, чтобы посмотреть, какой порог по сходству при перекрытии ридов в процессе образования дисджойнтигов он установит. Делаю это во временной папке "Test_gene_assembly_to_determine_Flye_default_parameters". Заодно, посмотрю, какой repeat_graph_ovlp_divergence он установит (хотя это, в общем, можно определить и без сборки, а просто по значению s_flye_option_to_provide_reads_with)
+	n_max_divergence_between_reads_during_disjointig_construction__when_Flye_is_run_with_default_parameters = -100 #Значение этой переменной эквивалентно значению параметра assemble_ovlp_divergence, когда он используется в сочетании с "assemble_divergence_relative=0". -100 это плейсхолдер.
+	n_repeat_graph_ovlp_divergence__when_Flye_is_run_with_default_parameters = -100 #Значение параметра repeat_graph_ovlp_divergence. -100 это плейсхолдер
 	
-	d_max_divergence_to_AG[n_max_divergence] = n_AG_for_point_2
-
-	o_current_time_and_date = datetime.datetime.now()
-	s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
-	f_logs.write(s_current_time_and_date + "\n")
-	f_logs.write("AG for max_divergence " + str(n_max_divergence) + " is " + str(n_AG_for_point_2) + "\n\n")
-
-	#Анализирую третью точку.
-	n_number_of_the_point_under_analysis += 1
-	n_max_divergence = n_point_3
-
 	o_current_time_and_date = datetime.datetime.now()
 	s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
-	f_logs.write(s_current_time_and_date + "\n")
-	f_logs.write("Mabs-flye started to analyze point " + str(n_number_of_the_point_under_analysis) + " of 10. Max_divergence in this point is " + str(n_max_divergence) + "\n")
-
+	f_log.write(s_current_time_and_date + "\n")
+	f_log.write("Mabs-flye started a test assembly to determine Flye default parameters\n\n")
+	
 	#если пользователь не указывал размер генома
 	if s_genome_size_estimate == "auto":
-		os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Flye/bin/flye --nano-raw " + s_path_to_all_long_reads_that_correspond_to_busco_genes + " --out-dir " + s_path_to_the_output_folder + "/Gene_assembly_for_max_divergence_" + str(n_max_divergence) + " --threads " + str(n_number_of_cpu_threads_to_use) + " --no-alt-contigs --extra-params assemble_ovlp_divergence=" + str(n_max_divergence) + ",repeat_graph_ovlp_divergence=" + str(n_max_divergence) + ",assemble_divergence_relative=0 " + s_additional_flye_parameters)
+		os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Flye/bin/flye " + s_flye_option_to_provide_reads_with + " " + s_path_to_all_long_reads_that_correspond_to_busco_genes + " --out-dir " + s_path_to_the_output_folder + "/Test_gene_assembly_to_determine_Flye_default_parameters --threads " + str(n_number_of_cpu_threads_to_use) + " --no-alt-contigs --stop-after assembly")
 	#если пользователь указал размер генома
 	else:
-		os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Flye/bin/flye --nano-raw " + s_path_to_all_long_reads_that_correspond_to_busco_genes + " --out-dir " + s_path_to_the_output_folder + "/Gene_assembly_for_max_divergence_" + str(n_max_divergence) + " --threads " + str(n_number_of_cpu_threads_to_use) + " --genome-size " + s_genome_size_estimate + " --no-alt-contigs --extra-params assemble_ovlp_divergence=" + str(n_max_divergence) + ",repeat_graph_ovlp_divergence=" + str(n_max_divergence) + ",assemble_divergence_relative=0 " + s_additional_flye_parameters)
-
-	#Смотрю, получился ли файл assembly.fasta. Его может не быть, если Flye не собрал ни одного дисджойнтига — в таком случае Flye прекращает работу преждевременно, не выдавая файла assembly.fasta. То, что Flye не выдал ни одного дисджойнтига, может быть связано с тем, что для ридов с большим количеством ошибок Mabs-flye попробовал очень маленький max_divergence. В случае, если файла assembly.fasta нет, я, даже не запуская скрипт calculate_AG.py, сразу считаю, что AG=0.
-	if not os.path.isfile(s_path_to_the_output_folder + "/Gene_assembly_for_max_divergence_" + str(n_max_divergence) + "/assembly.fasta"):
-		n_AG_for_point_3 = 0
-	else:
-		#"--number_of_busco_orthogroups all" использую потому, что в папке BUSCO_dataset_to_use уже оставлены только те ортогруппы, которые нужно использовать.
-		os.system("python3 " + s_path_to_the_folder_where_Mabs_lies + "/calculate_AG.py --output_folder " + s_path_to_the_output_folder + "/AG_calculation_for_max_divergence_" + str(n_max_divergence) + " --assembly " + s_path_to_the_output_folder + "/Gene_assembly_for_max_divergence_" + str(n_max_divergence) + "/assembly.fasta " + s_long_reads_option_for_calculate_AG + " " + s_path_to_all_long_reads_that_correspond_to_busco_genes + " --number_of_busco_orthogroups all --local_busco_dataset " + s_path_to_the_output_folder + "/BUSCO_dataset_to_use --use_proovframe true --max_intron_length " + s_maximum_allowed_intron_length + " --threads " + str(n_number_of_cpu_threads_to_use))
-
-		#Беру AG, посчитанный скриптом calculate_AG.py
-		if os.path.isfile(s_path_to_the_output_folder + "/AG_calculation_for_max_divergence_" + str(n_max_divergence) + "/AG.txt"):
-			f_infile = open(s_path_to_the_output_folder + "/AG_calculation_for_max_divergence_" + str(n_max_divergence) + "/AG.txt", "r")
-			s_line_1 = f_infile.readline()
-			#AG is 487
-			o_regular_expression_results = re.search(r"AG is (\d+)", s_line_1)
-			n_AG_for_point_3 = int(o_regular_expression_results.group(1))
-		else:
-			f_logs.write("Error. Couldn't calculate AG. See stderr and stdout for the reason why.")
-			sys.exit()
+		os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Flye/bin/flye " + s_flye_option_to_provide_reads_with + " " + s_path_to_all_long_reads_that_correspond_to_busco_genes + " --out-dir " + s_path_to_the_output_folder + "/Test_gene_assembly_to_determine_Flye_default_parameters --threads " + str(n_number_of_cpu_threads_to_use) + " --genome-size " + s_genome_size_estimate + " --no-alt-contigs --stop-after assembly")
+	
+	#Смотрю, какие значения n_max_divergence_between_reads_during_disjointig_construction и n_repeat_graph_ovlp_divergence Flye использовал, когда он запускался по умолчанию.
+	f_infile = open(s_path_to_the_output_folder + "/Test_gene_assembly_to_determine_Flye_default_parameters/flye.log", "r")
+	
+	for s_line in f_infile:
+		#[2023-07-10 10:52:57] DEBUG: Max divergence threshold set to 0.218233
+		o_regular_expression_results = re.search(r"Max divergence threshold set to ([\d\.]+)", s_line)
+		if o_regular_expression_results:
+			n_max_divergence_between_reads_during_disjointig_construction__when_Flye_is_run_with_default_parameters = float(o_regular_expression_results.group(1))
+		#[2023-07-10 19:24:50] DEBUG: 	repeat_graph_ovlp_divergence=0.08
+		o_regular_expression_results = re.search(r"repeat_graph_ovlp_divergence=([\d\.]+)", s_line)
+		if o_regular_expression_results:
+			n_repeat_graph_ovlp_divergence__when_Flye_is_run_with_default_parameters = float(o_regular_expression_results.group(1))
+	
+	f_infile.close()
+	
+	if (n_max_divergence_between_reads_during_disjointig_construction__when_Flye_is_run_with_default_parameters == -100) or (n_repeat_graph_ovlp_divergence__when_Flye_is_run_with_default_parameters == -100):
+		f_log.write("Error. Couldn't parse the results of Flye run with default parameters. If you don't know the cause of this, please report this problem to https://github.com/shelkmike/Mabs/issues\n")
+		sys.exit()
+	
+	n_number_of_the_point_under_analysis = 0 #порядковый номер точки, которую я анализирую. Считается от 1.
+	
+	#Тут я описываю функцию, которой на вход даются значения assemble_ovlp_divergence и repeat_graph_ovlp_divergence, а выдаёт функция -AG. С минусом спереди — потому, что scipy.optimize.minimize ищет минимум, а не максимум. Поэтому для максимизации AG нужно минимизировать -AG. Два параметра, дающихся на вход, я даю через список (потому что так нужно scipy.optimize.minimize).
+	def function_two_Flye_parameters_to_minus_AG(l_two_input_parameters):
+		global n_number_of_the_point_under_analysis #Без этой строки возникает ошибка "local variable 'n_number_of_the_point_under_analysis' referenced before assignment", потому что без этой строки нельзя модифицировать ("n_number_of_the_point_under_analysis += 1") глобальную переменную внутри функции.
+		
+		#Числа округляю до 5-го знака, чтобы не было всяких 0.12000000000000001
+		n_assemble_ovlp_divergence = round(l_two_input_parameters[0], 5)
+		n_repeat_graph_ovlp_divergence = round(l_two_input_parameters[1], 5)
 		
-	d_max_divergence_to_AG[n_max_divergence] = n_AG_for_point_3
-
-	o_current_time_and_date = datetime.datetime.now()
-	s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
-	f_logs.write(s_current_time_and_date + "\n")
-	f_logs.write("AG for max_divergence " + str(n_max_divergence) + " is " + str(n_AG_for_point_3) + "\n\n")
-
-	#теперь последовательно выбираю остальные 8 точек методом золотого сечения и меряю AG для них.
-	while n_number_of_the_point_under_analysis < 10: #"<", а не "<=", потому что увеличение номера точки здесь делается в начале цикла.
 		n_number_of_the_point_under_analysis += 1
 		
-		#Смотрю, какая из двух центральных точек (вторая или третья) имеют меньшее значение AG. Если вторая имеет меньшее ли равное третьей, то выкидываю первую точку и сужаю интервал. Если третья имеет меньшее, чем вторая, то выкидываю четвёртую точку и сужаю интервал. При равных значениях выкидывыю левую, потому что равные значения могут быть из-за того, что в обеих точках AG = 0 из-за того, что при сборке по ридам с большим количеством ошибок обе центральные точки имели слишком маленький max_divergence, из-за чего Flye не смог собрать ни одного дисджойнтига и поэтому выдал пустой файл assembly.fasta.
-		if n_AG_for_point_2 <= n_AG_for_point_3:
-			n_point_1 = n_point_2
-			n_point_2 = n_point_3
-			#n_point_4 не меняется
-			n_point_3 = round(10**(math.log10(n_point_4) - ((math.sqrt(5) - 1) / (math.sqrt(5) + 1))*(math.log10(n_point_4) - math.log10(n_point_1))), 6)
-			
-			n_AG_for_point_1 = n_AG_for_point_2
-			n_AG_for_point_2 = n_AG_for_point_3
-			#n_AG_for_point_4 не меняется
-			n_AG_for_point_3 = -100 #плейсхолдер. Всё равно это значение я сейчас посчитаю.
-			
-			#Анализирую третью точку.
-			n_max_divergence = n_point_3
-
-			o_current_time_and_date = datetime.datetime.now()
-			s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
-			f_logs.write(s_current_time_and_date + "\n")
-			f_logs.write("Mabs-flye started to analyze point " + str(n_number_of_the_point_under_analysis) + " of 10. Max_divergence in this point is " + str(n_max_divergence) + "\n")
-			
-			#если пользователь не указывал размер генома
-			if s_genome_size_estimate == "auto":
-				os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Flye/bin/flye --nano-raw " + s_path_to_all_long_reads_that_correspond_to_busco_genes + " --out-dir " + s_path_to_the_output_folder + "/Gene_assembly_for_max_divergence_" + str(n_max_divergence) + " --threads " + str(n_number_of_cpu_threads_to_use) + " --no-alt-contigs --extra-params assemble_ovlp_divergence=" + str(n_max_divergence) + ",repeat_graph_ovlp_divergence=" + str(n_max_divergence) + ",assemble_divergence_relative=0 " + s_additional_flye_parameters)
-			#если пользователь указал размер генома
-			else:
-				os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Flye/bin/flye --nano-raw " + s_path_to_all_long_reads_that_correspond_to_busco_genes + " --out-dir " + s_path_to_the_output_folder + "/Gene_assembly_for_max_divergence_" + str(n_max_divergence) + " --threads " + str(n_number_of_cpu_threads_to_use) + " --genome-size " + s_genome_size_estimate + " --no-alt-contigs --extra-params assemble_ovlp_divergence=" + str(n_max_divergence) + ",repeat_graph_ovlp_divergence=" + str(n_max_divergence) + ",assemble_divergence_relative=0 " + s_additional_flye_parameters)
-			
-			#Смотрю, получился ли файл assembly.fasta. Его может не быть, если Flye не собрал ни одного дисджойнтига — в таком случае Flye прекращает работу преждевременно, не выдавая файла assembly.fasta. То, что Flye не выдал ни одного дисджойнтига, может быть связано с тем, что для ридов с большим количеством ошибок Mabs-flye попробовал очень маленький max_divergence. В случае, если файла assembly.fasta нет, я, даже не запуская скрипт calculate_AG.py, сразу считаю, что AG=0.
-			if not os.path.isfile(s_path_to_the_output_folder + "/Gene_assembly_for_max_divergence_" + str(n_max_divergence) + "/assembly.fasta"):
-				n_AG_for_point_3 = 0
-			else:
-				#"--number_of_busco_orthogroups all" использую потому, что в папке BUSCO_dataset_to_use уже оставлены только те ортогруппы, которые нужно использовать.
-				os.system("python3 " + s_path_to_the_folder_where_Mabs_lies + "/calculate_AG.py --output_folder " + s_path_to_the_output_folder + "/AG_calculation_for_max_divergence_" + str(n_max_divergence) + " --assembly " + s_path_to_the_output_folder + "/Gene_assembly_for_max_divergence_" + str(n_max_divergence) + "/assembly.fasta " + s_long_reads_option_for_calculate_AG + " " + s_path_to_all_long_reads_that_correspond_to_busco_genes + " --number_of_busco_orthogroups all --local_busco_dataset " + s_path_to_the_output_folder + "/BUSCO_dataset_to_use --use_proovframe true --max_intron_length " + s_maximum_allowed_intron_length + " --threads " + str(n_number_of_cpu_threads_to_use))
-
-				#Беру AG, посчитанный скриптом calculate_AG.py
-				if os.path.isfile(s_path_to_the_output_folder + "/AG_calculation_for_max_divergence_" + str(n_max_divergence) + "/AG.txt"):
-					f_infile = open(s_path_to_the_output_folder + "/AG_calculation_for_max_divergence_" + str(n_max_divergence) + "/AG.txt", "r")
-					s_line_1 = f_infile.readline()
-					#AG is 487
-					o_regular_expression_results = re.search(r"AG is (\d+)", s_line_1)
-					n_AG_for_point_3 = int(o_regular_expression_results.group(1))
-				else:
-					f_logs.write("Error. Couldn't calculate AG. See stderr and stdout for the reason why.")
-					sys.exit()
-			
-			d_max_divergence_to_AG[n_max_divergence] = n_AG_for_point_3
-			
-			o_current_time_and_date = datetime.datetime.now()
-			s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
-			f_logs.write(s_current_time_and_date + "\n")
-			f_logs.write("AG for max_divergence " + str(n_max_divergence) + " is " + str(n_AG_for_point_3) + "\n\n")
-			
-		elif n_AG_for_point_2 > n_AG_for_point_3:
-			#n_point_1 не меняется
-			n_point_4 = n_point_3
-			n_point_3 = n_point_2
-			n_point_2 = round(10**(math.log10(n_point_1) + ((math.sqrt(5) - 1) / (math.sqrt(5) + 1))*(math.log10(n_point_4) - math.log10(n_point_1))), 6)
-			
-			#n_AG_for_point_1 не меняется
-			n_AG_for_point_4 = n_AG_for_point_3
-			n_AG_for_point_3 = n_AG_for_point_2
-			n_AG_for_point_2 = -100 #плейсхолдер. Всё равно это значение я сейчас посчитаю.
-			
-			#Анализирую вторую точку.
-			n_max_divergence = n_point_2
-
-			o_current_time_and_date = datetime.datetime.now()
-			s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
-			f_logs.write(s_current_time_and_date + "\n")
-			f_logs.write("Mabs-flye started to analyze point " + str(n_number_of_the_point_under_analysis) + " of 10. Max_divergence in this point is " + str(n_max_divergence) + "\n")
-
-			#если пользователь не указывал размер генома
-			if s_genome_size_estimate == "auto":
-				os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Flye/bin/flye --nano-raw " + s_path_to_all_long_reads_that_correspond_to_busco_genes + " --out-dir " + s_path_to_the_output_folder + "/Gene_assembly_for_max_divergence_" + str(n_max_divergence) + " --threads " + str(n_number_of_cpu_threads_to_use) + " --no-alt-contigs --extra-params assemble_ovlp_divergence=" + str(n_max_divergence) + ",repeat_graph_ovlp_divergence=" + str(n_max_divergence) + ",assemble_divergence_relative=0 " + s_additional_flye_parameters)
-			#если пользователь указал размер генома
-			else:
-				os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Flye/bin/flye --nano-raw " + s_path_to_all_long_reads_that_correspond_to_busco_genes + " --out-dir " + s_path_to_the_output_folder + "/Gene_assembly_for_max_divergence_" + str(n_max_divergence) + " --threads " + str(n_number_of_cpu_threads_to_use) + " --genome-size " + s_genome_size_estimate + " --no-alt-contigs --extra-params assemble_ovlp_divergence=" + str(n_max_divergence) + ",repeat_graph_ovlp_divergence=" + str(n_max_divergence) + ",assemble_divergence_relative=0 " + s_additional_flye_parameters)			
-			
-			#Смотрю, получился ли файл assembly.fasta. Его может не быть, если Flye не собрал ни одного дисджойнтига — в таком случае Flye прекращает работу преждевременно, не выдавая файла assembly.fasta. То, что Flye не выдал ни одного дисджойнтига, может быть связано с тем, что для ридов с большим количеством ошибок Mabs-flye попробовал очень маленький max_divergence. В случае, если файла assembly.fasta нет, я, даже не запуская скрипт calculate_AG.py, сразу считаю, что AG=0.
-			if not os.path.isfile(s_path_to_the_output_folder + "/Gene_assembly_for_max_divergence_" + str(n_max_divergence) + "/assembly.fasta"):
-				n_AG_for_point_2 = 0
+		o_current_time_and_date = datetime.datetime.now()
+		s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
+		f_log.write(s_current_time_and_date + "\n")
+		f_log.write("Mabs-flye started to analyze point " + str(n_number_of_the_point_under_analysis) + ". This point is: assemble_ovlp_divergence = " + str(n_assemble_ovlp_divergence) + ", repeat_graph_ovlp_divergence = " + str(n_repeat_graph_ovlp_divergence) + ", assemble_divergence_relative = 0\n")
+		
+		#если пользователь не указывал размер генома
+		if s_genome_size_estimate == "auto":
+			os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Flye/bin/flye " + s_flye_option_to_provide_reads_with + " " + s_path_to_all_long_reads_that_correspond_to_busco_genes + " --out-dir " + s_path_to_the_output_folder + "/Gene_assembly_for_point_" + str(n_number_of_the_point_under_analysis) + " --threads " + str(n_number_of_cpu_threads_to_use) + " --no-alt-contigs --extra-params assemble_ovlp_divergence=" + str(n_assemble_ovlp_divergence) + ",repeat_graph_ovlp_divergence=" + str(n_repeat_graph_ovlp_divergence) + ",assemble_divergence_relative=0 " + " " + s_additional_flye_parameters)
+		#если пользователь указал размер генома
+		else:
+			os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Flye/bin/flye " + s_flye_option_to_provide_reads_with + " " + s_path_to_all_long_reads_that_correspond_to_busco_genes + " --out-dir " + s_path_to_the_output_folder + "/Gene_assembly_for_point_" + str(n_number_of_the_point_under_analysis) + " --threads " + str(n_number_of_cpu_threads_to_use) + " --genome-size " + s_genome_size_estimate + " --no-alt-contigs --extra-params assemble_ovlp_divergence=" + str(n_assemble_ovlp_divergence) + ",repeat_graph_ovlp_divergence=" + str(n_repeat_graph_ovlp_divergence) + ",assemble_divergence_relative=0 " + s_additional_flye_parameters)
+		
+		#Смотрю, получился ли файл assembly.fasta. Его может не быть, если Flye не собрал ни одного дисджойнтига — в таком случае Flye прекращает работу преждевременно, не выдавая файла assembly.fasta. То, что Flye не выдал ни одного дисджойнтига, может быть связано с тем, что для ридов с большим количеством ошибок Mabs-flye попробовал очень маленький assemble_ovlp_divergence. В случае, если файла assembly.fasta нет, я, даже не запуская скрипт calculate_AG.py, сразу считаю, что AG=0.
+		if not os.path.isfile(s_path_to_the_output_folder + "/Gene_assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly.fasta"):
+			n_AG = 0
+		else:
+			#Считаю AG
+			os.system("python3 " + s_path_to_the_folder_where_Mabs_lies + "/calculate_AG.py --output_folder " + s_path_to_the_output_folder + "/AG_calculation_for_point_" + str(n_number_of_the_point_under_analysis) + " --assembly " + s_path_to_the_output_folder + "/Gene_assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly.fasta " + s_long_reads_option_for_calculate_AG + " " + s_path_to_all_long_reads_that_correspond_to_busco_genes + " --number_of_busco_orthogroups all --local_busco_dataset " + s_path_to_the_output_folder + "/BUSCO_dataset_to_use --use_proovframe true --max_intron_length " + s_maximum_allowed_intron_length + " --threads " + str(n_number_of_cpu_threads_to_use))
+
+			#Беру AG, посчитанный скриптом calculate_AG.py
+			if os.path.isfile(s_path_to_the_output_folder + "/AG_calculation_for_point_" + str(n_number_of_the_point_under_analysis) + "/AG.txt"):
+				f_infile = open(s_path_to_the_output_folder + "/AG_calculation_for_point_" + str(n_number_of_the_point_under_analysis) + "/AG.txt", "r")
+				s_line_1 = f_infile.readline()
+				f_infile.close()
+				#AG is 487
+				o_regular_expression_results = re.search(r"AG is (\d+)", s_line_1)
+				n_AG = int(o_regular_expression_results.group(1))
 			else:
-				#"--number_of_busco_orthogroups all" использую потому, что в папке BUSCO_dataset_to_use уже оставлены только те ортогруппы, которые нужно использовать.
-				os.system("python3 " + s_path_to_the_folder_where_Mabs_lies + "/calculate_AG.py --output_folder " + s_path_to_the_output_folder + "/AG_calculation_for_max_divergence_" + str(n_max_divergence) + " --assembly " + s_path_to_the_output_folder + "/Gene_assembly_for_max_divergence_" + str(n_max_divergence) + "/assembly.fasta " + s_long_reads_option_for_calculate_AG + " " + s_path_to_all_long_reads_that_correspond_to_busco_genes + " --number_of_busco_orthogroups all --local_busco_dataset " + s_path_to_the_output_folder + "/BUSCO_dataset_to_use --use_proovframe true --max_intron_length " + s_maximum_allowed_intron_length + " --threads " + str(n_number_of_cpu_threads_to_use))
-				
-				#Беру AG, посчитанный скриптом calculate_AG.py
-				if os.path.isfile(s_path_to_the_output_folder + "/AG_calculation_for_max_divergence_" + str(n_max_divergence) + "/AG.txt"):
-					f_infile = open(s_path_to_the_output_folder + "/AG_calculation_for_max_divergence_" + str(n_max_divergence) + "/AG.txt", "r")
-					s_line_1 = f_infile.readline()
-					#AG is 487
-					o_regular_expression_results = re.search(r"AG is (\d+)", s_line_1)
-					n_AG_for_point_2 = int(o_regular_expression_results.group(1))
-				else:
-					f_logs.write("Error. Couldn't calculate AG. See stderr and stdout for the reason why.")
-			
-			d_max_divergence_to_AG[n_max_divergence] = n_AG_for_point_2
-			
-			o_current_time_and_date = datetime.datetime.now()
-			s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
-			f_logs.write(s_current_time_and_date + "\n")
-			f_logs.write("AG for max_divergence " + str(n_max_divergence) + " is " + str(n_AG_for_point_2) + "\n\n")
-			
+				f_log.write("Error. Couldn't calculate AG. See stderr and stdout for the reason why.")
+				sys.exit()
+		
+		o_current_time_and_date = datetime.datetime.now()
+		s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
+		f_log.write(s_current_time_and_date + "\n")
+		f_log.write("AG for point " + str(n_number_of_the_point_under_analysis) + " is " + str(n_AG) + "\n\n")
+		
+		return(-n_AG)
+	
+	#Теперь, собственно, делаю сборку, проверяя максимум n_maximum_number_of_points_to_try точек
+	
+	o_current_time_and_date = datetime.datetime.now()
+	s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
+	f_log.write(s_current_time_and_date + "\n")
+	f_log.write("Mabs-flye will try at most " + str(n_maximum_number_of_points_to_try) + " points\n\n")
 	
-	#После того, как посчитал AG для всех 10 точек, я смотрю, какая из них имела наибольший AG. Тот max_divergence, который соответствует этой точке, я и считаю оптимальным для сборки Flye. Если две точки дают одинаковый AG, то, для определённости, выбираю ту из них, которая имеет больший max_divergence.
+	"""
+	Максимальное возможное значение repeat_graph_ovlp_divergence ставлю в 0.5. Сомневаюсь, что могут быть полезны более высокие значение. Поэтому, чтобы Mabs-flye терял меньше времени на проверку малозначащих точек, поставил такое ограничение.
+	"""
+	o_optimization_results = scipy.optimize.minimize(fun = function_two_Flye_parameters_to_minus_AG, x0 = [n_max_divergence_between_reads_during_disjointig_construction__when_Flye_is_run_with_default_parameters, n_repeat_graph_ovlp_divergence__when_Flye_is_run_with_default_parameters], method = "Nelder-Mead", bounds = ((0, 1), (0, 0.5)), options = {"maxfev" : n_maximum_number_of_points_to_try, "initial_simplex" : [[n_max_divergence_between_reads_during_disjointig_construction__when_Flye_is_run_with_default_parameters, n_repeat_graph_ovlp_divergence__when_Flye_is_run_with_default_parameters], [2 * n_max_divergence_between_reads_during_disjointig_construction__when_Flye_is_run_with_default_parameters, 2 * n_repeat_graph_ovlp_divergence__when_Flye_is_run_with_default_parameters], [n_max_divergence_between_reads_during_disjointig_construction__when_Flye_is_run_with_default_parameters, n_repeat_graph_ovlp_divergence__when_Flye_is_run_with_default_parameters / 2]]}) #Не понял, чем maxfev отличается от maxiter. Но оптимизация останавливается на n_maximum_number_of_points_to_try именно если указать это число как maxfev, а не как maxiter.
 	
-	n_max_divergence_that_provides_maximum_AG = -100
-	n_maximum_AG = -100
-	for n_max_divergence in d_max_divergence_to_AG:
-		if d_max_divergence_to_AG[n_max_divergence] > n_maximum_AG:
-			n_max_divergence_that_provides_maximum_AG = n_max_divergence
-			n_maximum_AG = d_max_divergence_to_AG[n_max_divergence]
-		
-		if (d_max_divergence_to_AG[n_max_divergence] == n_maximum_AG) and (n_max_divergence > n_max_divergence_that_provides_maximum_AG):
-			n_max_divergence_that_provides_maximum_AG = n_max_divergence
+	n_optimal_assemble_ovlp_divergence = o_optimization_results.x[0]
+	n_optimal_repeat_graph_ovlp_divergence = o_optimization_results.x[1]
+	n_maximum_AG = - int(o_optimization_results.fun) #конвертирую в int, потому что scipy.optimize.minimize выдаёт это число во float (хоть это всегда и целый float). А в логи я его хочу записать в виде int.
 	
 	o_current_time_and_date = datetime.datetime.now()
 	s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
-	f_logs.write(s_current_time_and_date + "\n")
-	f_logs.write("The optimal max_divergence is " + str(n_max_divergence_that_provides_maximum_AG) + ". When assembling only genes, it provides AG = " + str(n_maximum_AG) + ". Now Mabs-flye starts to assemble the genome using all reads and max_divergence = " + str(n_max_divergence_that_provides_maximum_AG) + "\n")
+	f_log.write(s_current_time_and_date + "\n")
+	f_log.write("The optimal combination of Flye parameters as determined by Mabs-flye is: assemble_ovlp_divergence = " + str(n_optimal_assemble_ovlp_divergence) + ", repeat_graph_ovlp_divergence = " + str(n_optimal_repeat_graph_ovlp_divergence) + ", assemble_divergence_relative = 0. When assembling only genes, it provides AG = " + str(n_maximum_AG) + ". Now Mabs-flye starts to assemble the genome using all reads with the optimal combination of parameters.\n\n")
 	
-	#Теперь делаю сборку Flye по всем ридам, используя n_max_divergence_that_provides_maximum_AG.
+	#Теперь делаю сборку Flye по всем ридам, используя найденную оптимальную комбинацию параметров.
 	
 	#если пользователь не указывал размер генома
 	if s_genome_size_estimate == "auto":
-		os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Flye/bin/flye --nano-raw " + s_path_to_the_file_with_all_long_reads + " --out-dir " + s_path_to_the_output_folder + "/The_best_assembly --threads " + str(n_number_of_cpu_threads_to_use) + " --no-alt-contigs --extra-params assemble_ovlp_divergence=" + str(n_max_divergence_that_provides_maximum_AG) + ",repeat_graph_ovlp_divergence=" + str(n_max_divergence_that_provides_maximum_AG) + ",assemble_divergence_relative=0 " + s_additional_flye_parameters)
+		os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Flye/bin/flye " + s_flye_option_to_provide_reads_with + " " + s_path_to_the_file_with_all_long_reads + " --out-dir " + s_path_to_the_output_folder + "/The_best_assembly --threads " + str(n_number_of_cpu_threads_to_use) + " --no-alt-contigs --extra-params assemble_ovlp_divergence=" + str(n_optimal_assemble_ovlp_divergence) + ",repeat_graph_ovlp_divergence=" + str(n_optimal_repeat_graph_ovlp_divergence) + ",assemble_divergence_relative=0 " + s_additional_flye_parameters)
 	#если пользователь указал размер генома
 	else:
-		os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Flye/bin/flye --nano-raw " + s_path_to_the_file_with_all_long_reads + " --out-dir " + s_path_to_the_output_folder + "/The_best_assembly --threads " + str(n_number_of_cpu_threads_to_use) + " --genome-size " + s_genome_size_estimate + " --no-alt-contigs --extra-params assemble_ovlp_divergence=" + str(n_max_divergence_that_provides_maximum_AG) + ",repeat_graph_ovlp_divergence=" + str(n_max_divergence_that_provides_maximum_AG) + ",assemble_divergence_relative=0 " + s_additional_flye_parameters)
+		os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Flye/bin/flye " + s_flye_option_to_provide_reads_with + " " + s_path_to_the_file_with_all_long_reads + " --out-dir " + s_path_to_the_output_folder + "/The_best_assembly --threads " + str(n_number_of_cpu_threads_to_use) + " --genome-size " + s_genome_size_estimate + " --no-alt-contigs --extra-params assemble_ovlp_divergence=" + str(n_optimal_assemble_ovlp_divergence) + ",repeat_graph_ovlp_divergence=" + str(n_optimal_repeat_graph_ovlp_divergence) + ",assemble_divergence_relative=0 " + s_additional_flye_parameters)
 	
 	o_current_time_and_date = datetime.datetime.now()
 	s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
-	f_logs.write(s_current_time_and_date + "\n")
-	f_logs.write("Mabs-flye finished. The contigs are in the file " + s_path_to_the_output_folder + "/The_best_assembly/assembly.fasta. Now I recommend to use a separate tool, for example HyPo (https://github.com/kensung-lab/hypo), to polish these contigs with accurate (HiFi, Illumina or MGI) reads.")
+	f_log.write(s_current_time_and_date + "\n")
+	f_log.write("Mabs-flye finished. The contigs are in the file " + s_path_to_the_output_folder + "/The_best_assembly/assembly.fasta. Now I recommend to use a separate tool, for example HyPo (https://github.com/kensung-lab/hypo), to polish these contigs with accurate (HiFi, Illumina or MGI) reads.")
 
 
 
diff --git a/mabs-hifiasm.py b/mabs-hifiasm.py
index 356058e..a63db78 100755
--- a/mabs-hifiasm.py
+++ b/mabs-hifiasm.py
@@ -19,6 +19,7 @@
 import sys
 import os
 import re
+import time
 import datetime
 import urllib.request
 #import ssl
@@ -67,6 +68,9 @@
 		
 	if not os.path.isfile(s_path_to_the_folder_where_Mabs_lies + "/Additional/get_single_end_reads_from_DIAMOND_results.py"):
 		l_unavailable_files_and_folders.append("The file get_single_end_reads_from_DIAMOND_results.py should be in the subfolder \"Additional\" of the folder where Mabs lies.")
+		
+	if not os.path.isfile(s_path_to_the_folder_where_Mabs_lies + "/Additional/calculate_N50.py"):
+		l_unavailable_files_and_folders.append("The file calculate_N50.py should be in the subfolder \"Additional\" of the folder where Mabs lies.")
 	
 	if not os.path.isdir(s_path_to_the_folder_where_Mabs_lies + "/Test_datasets"):
 		l_unavailable_files_and_folders.append("The subfolder \"Test_datasets\" should be in the folder where Mabs lies.")
@@ -94,7 +98,7 @@
 	
 	s_additional_hifiasm_parameters = "" #дополнительные параметры Hifiasm.
 	
-	s_Mabs_version = "2.19"
+	s_Mabs_version = "2.24"
 
 	l_errors_in_command_line = [] #список ошибок в командной строке. Если пользователь совершил много ошибок, то Mabs-hifiasm напишет про них все, а не только про первую встреченную.
 
@@ -169,7 +173,7 @@
 		
 		s_path_to_a_local_busco_dataset = s_path_to_the_output_folder + "/" + s_busco_dataset_name_online #путь к месту, где будет лежать скачанный архивированный gzip файл с датасетом BUSCO.
 		
-		#проверяю, доступен ли адрес http://mikeshelk.site/Data/BUSCO_datasets/Latest/. Он может быть недоступен из-за каких-то проблем с сервером. Если не доступен, то рекомендую пользователю скачать базу с http://busco-data.ezlab.org/v5/data/lineages/ и использовать опцию --local_busco_dataset. Проверку делаю примерно как написано на https://stackoverflow.com/questions/1949318/checking-if-a-website-is-up-via-python . А если доступен, то делаю ещё одну проверку — на то, есть ли нужный файл в папке http://mikeshelk.site/Data/BUSCO_datasets/Latest/
+		#проверяю, доступен ли адрес http://mikeshelk.site/Data/BUSCO_datasets/Latest/. Он может быть недоступен из-за каких-то проблем с сервером. Если не доступен, то пробую ещё два раза с интервалом в 5 секунд. Если адрес так и не станет доступным, то рекомендую пользователю скачать базу с http://busco-data.ezlab.org/v5/data/lineages/ и использовать опцию --local_busco_dataset. Проверку делаю примерно как написано на https://stackoverflow.com/questions/1949318/checking-if-a-website-is-up-via-python . А если доступен, то делаю ещё одну проверку — на то, есть ли нужный файл в папке http://mikeshelk.site/Data/BUSCO_datasets/Latest/
 		try:
 			s_dummy_variable = urllib.request.urlopen("http://mikeshelk.site/Data/BUSCO_datasets/Latest/").getcode()
 			#проверяю, доступен ли нужный файл, и если доступен, то качаю его.
@@ -179,7 +183,27 @@
 				l_errors_in_command_line.append("The file " + s_busco_dataset_name_online + " does not exist at http://mikeshelk.site/Data/BUSCO_datasets/Latest/ .")
 
 		except:
-			l_errors_in_command_line.append("Unfortunately, http://mikeshelk.site/Data/BUSCO_datasets/Latest/ is currently not accessible. To test Mabs-hifiasm, download the file http://busco-data.ezlab.org/v5/data/lineages/saccharomycetes_odb10.2020-08-05.tar.gz and run the following command:\nmabs-hifiasm.py --pacbio_hifi_reads [PATH TO THE FOLDER WITH MABS]/Test_datasets/pacbio_hifi_test_reads.fastq.gz --local_busco_dataset saccharomycetes_odb10.2020-08-05.tar.gz")
+			time.sleep(5)
+			try:
+				s_dummy_variable = urllib.request.urlopen("http://mikeshelk.site/Data/BUSCO_datasets/Latest/").getcode()
+				#проверяю, доступен ли нужный файл, и если доступен, то качаю его.
+				try:
+					urllib.request.urlretrieve("http://mikeshelk.site/Data/BUSCO_datasets/Latest/" + s_busco_dataset_name_online, s_path_to_a_local_busco_dataset)
+				except:
+					l_errors_in_command_line.append("The file " + s_busco_dataset_name_online + " does not exist at http://mikeshelk.site/Data/BUSCO_datasets/Latest/ .")
+
+			except:
+				time.sleep(5)
+				try:
+					s_dummy_variable = urllib.request.urlopen("http://mikeshelk.site/Data/BUSCO_datasets/Latest/").getcode()
+					#проверяю, доступен ли нужный файл, и если доступен, то качаю его.
+					try:
+						urllib.request.urlretrieve("http://mikeshelk.site/Data/BUSCO_datasets/Latest/" + s_busco_dataset_name_online, s_path_to_a_local_busco_dataset)
+					except:
+						l_errors_in_command_line.append("The file " + s_busco_dataset_name_online + " does not exist at http://mikeshelk.site/Data/BUSCO_datasets/Latest/ .")
+
+				except:
+					l_errors_in_command_line.append("Unfortunately, http://mikeshelk.site/Data/BUSCO_datasets/Latest/ is currently not accessible. To test Mabs-hifiasm, download the file http://busco-data.ezlab.org/v5/data/lineages/saccharomycetes_odb10.2020-08-05.tar.gz and run the following command:\nmabs-hifiasm.py --pacbio_hifi_reads [PATH TO THE FOLDER WITH MABS]/Test_datasets/pacbio_hifi_test_reads.fastq.gz --local_busco_dataset saccharomycetes_odb10.2020-08-05.tar.gz")
 		
 		
 		if len(l_errors_in_command_line) != 0:
@@ -328,10 +352,9 @@
 			
 			s_path_to_a_local_busco_dataset = s_path_to_the_output_folder + "/" + s_busco_dataset_name_online #путь к месту, где будет лежать скачанный архивированный gzip файл с датасетом BUSCO.
 		
-			#проверяю, доступен ли адрес http://mikeshelk.site/Data/BUSCO_datasets/Latest/. Он может быть недоступен из-за каких-то проблем с сервером. Если не доступен, то рекомендую пользователю скачать базу с http://busco-data.ezlab.org/v5/data/lineages/ и использовать опцию --local_busco_dataset. Проверку делаю примерно как написано на https://stackoverflow.com/questions/1949318/checking-if-a-website-is-up-via-python . А если доступен, то делаю ещё одну проверку — на то, есть ли нужный файл в папке http://mikeshelk.site/Data/BUSCO_datasets/Latest/
+			#проверяю, доступен ли адрес http://mikeshelk.site/Data/BUSCO_datasets/Latest/. Он может быть недоступен из-за каких-то проблем с сервером. Если не доступен, то пробую ещё два раза с интервалом в 5 секунд. Если адрес так и не станет доступным, то рекомендую пользователю скачать базу с http://busco-data.ezlab.org/v5/data/lineages/ и использовать опцию --local_busco_dataset. Проверку делаю примерно как написано на https://stackoverflow.com/questions/1949318/checking-if-a-website-is-up-via-python . А если доступен, то делаю ещё одну проверку — на то, есть ли нужный файл в папке http://mikeshelk.site/Data/BUSCO_datasets/Latest/
 			try:
 				s_dummy_variable = urllib.request.urlopen("http://mikeshelk.site/Data/BUSCO_datasets/Latest/").getcode()
-				
 				#проверяю, доступен ли нужный файл, и если доступен, то качаю его.
 				try:
 					urllib.request.urlretrieve("http://mikeshelk.site/Data/BUSCO_datasets/Latest/" + s_busco_dataset_name_online, s_path_to_a_local_busco_dataset)
@@ -339,7 +362,27 @@
 					l_errors_in_command_line.append("The file " + s_busco_dataset_name_online + " does not exist at http://mikeshelk.site/Data/BUSCO_datasets/Latest/ .")
 
 			except:
-				l_errors_in_command_line.append("http://mikeshelk.site/Data/BUSCO_datasets/Latest/ is not accessible. Please, download a BUSCO dataset from http://busco-data.ezlab.org/v5/data/lineages/ and use \"--local_busco_dataset\" instead of \"--download_busco_dataset\".")
+				time.sleep(5)
+				try:
+					s_dummy_variable = urllib.request.urlopen("http://mikeshelk.site/Data/BUSCO_datasets/Latest/").getcode()
+					#проверяю, доступен ли нужный файл, и если доступен, то качаю его.
+					try:
+						urllib.request.urlretrieve("http://mikeshelk.site/Data/BUSCO_datasets/Latest/" + s_busco_dataset_name_online, s_path_to_a_local_busco_dataset)
+					except:
+						l_errors_in_command_line.append("The file " + s_busco_dataset_name_online + " does not exist at http://mikeshelk.site/Data/BUSCO_datasets/Latest/ .")
+
+				except:
+					time.sleep(5)
+					try:
+						s_dummy_variable = urllib.request.urlopen("http://mikeshelk.site/Data/BUSCO_datasets/Latest/").getcode()
+						#проверяю, доступен ли нужный файл, и если доступен, то качаю его.
+						try:
+							urllib.request.urlretrieve("http://mikeshelk.site/Data/BUSCO_datasets/Latest/" + s_busco_dataset_name_online, s_path_to_a_local_busco_dataset)
+						except:
+							l_errors_in_command_line.append("The file " + s_busco_dataset_name_online + " does not exist at http://mikeshelk.site/Data/BUSCO_datasets/Latest/ .")
+
+					except:
+						l_errors_in_command_line.append("http://mikeshelk.site/Data/BUSCO_datasets/Latest/ is not accessible. Please, download a BUSCO dataset from http://busco-data.ezlab.org/v5/data/lineages/ and use \"--local_busco_dataset\" instead of \"--download_busco_dataset\".")
 		
 		#если пользователь использовал --local_busco_dataset
 		o_regular_expression_results = re.search(r" --local_busco_dataset (\S+)", s_command_line_reduced)
@@ -460,13 +503,13 @@
 			
 			sys.exit()
 
-	f_logs = open(s_path_to_the_output_folder + "/mabs_logs.txt","w",buffering=1) #f_logs это общий файл с логами Mabs-hifiasm, в отличие от трёх дополнительных файлов с логами, которые ведут три отдельных экземпляра Mabs-hifiasm. buffering=1 означает, что буферизация идёт только на уровне строк.
+	f_log = open(s_path_to_the_output_folder + "/mabs_log.txt","w",buffering=1) #f_log это общий файл с логами Mabs-hifiasm, в отличие от трёх дополнительных файлов с логами, которые ведут три отдельных экземпляра Mabs-hifiasm. buffering=1 означает, что буферизация идёт только на уровне строк.
 	o_current_time_and_date = datetime.datetime.now()
 	s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
-	f_logs.write(s_current_time_and_date + "\n")
-	f_logs.write("Started Mabs-hifiasm\n\n")
+	f_log.write(s_current_time_and_date + "\n")
+	f_log.write("Started Mabs-hifiasm\n\n")
 
-	f_logs.write("You have run Mabs-hifiasm of version " + s_Mabs_version + " with the following command: " + s_command_line + "\n\n")
+	f_log.write("You have run Mabs-hifiasm of version " + s_Mabs_version + " with the following command: " + s_command_line + "\n\n")
 	
 	#Это строка, в которой указаны пути ко всем ридам, которые нужно давать Modified_hifiasm, а также, если пользователь указал размер генома, то и размер генома. Например, "--hg-size 1g --h1 hic_reads_R1.fastq --hi2 hic_reads_R1.fastq --ul nanopore_reads.fastq hifi_reads.fastq", если был указан размер генома, и были указаны и риды Hi-C, и ультрадлинные риды Нанопора, и риды HiFi. Или, например, просто "hifi_reads.fastq" если размер генома не был указан, и были только риды HiFi. Эта строка нужна, чтобы Mabs-hifiasm было проще передавать аргументы командной строки Modified_hifiasm. Иначе, передача аргументов несколько осложнена, потому что в зависимости от того, какие опции дал Mabs-hifiasm пользователь, программа Modified_hifiasm нужно передавать разное количество аргументов.
 	s_command_line_arguments_with_reads_for_Modified_hifiasm = s_path_to_pacbio_hifi_reads
@@ -486,9 +529,9 @@
 	
 	#если пользователь делает сборку тестового набора ридов Mabs-hifiasm, то нужно написать подробности этого тестового набора.
 	if (len(sys.argv) == 2) and re.search(r"\s\-\-run_test", s_command_line):
-		f_logs.write("As a test, Mabs-hifiasm will assemble the first chromosome of Saccharomyces cerevisiae, which is approximately 200 kbp long, using 40x PacBio HiFi reads.\n\n")
-		f_logs.write("The command \"mabs-hifiasm.py --run_test\" is equivalent to the command \"mabs-hifiasm.py --pacbio_hifi_reads " + s_path_to_the_folder_where_Mabs_lies + "/Test_datasets/pacbio_hifi_reads__test_set__for_diploid_assembly.fastq.gz --download_busco_dataset saccharomycetes_odb10.2020-08-05.tar.gz\"\n")
-		f_logs.write("If after Mabs-hifiasm finishes you see a file ./Mabs_results/The_best_assembly/assembly.fasta which has a size of approximately 200 kilobytes, then the test succeeded.\n\n")
+		f_log.write("As a test, Mabs-hifiasm will assemble the first chromosome of Saccharomyces cerevisiae, which is approximately 200 kbp long, using 40x PacBio HiFi reads.\n\n")
+		f_log.write("The command \"mabs-hifiasm.py --run_test\" is equivalent to the command \"mabs-hifiasm.py --pacbio_hifi_reads " + s_path_to_the_folder_where_Mabs_lies + "/Test_datasets/pacbio_hifi_reads__test_set__for_diploid_assembly.fastq.gz --download_busco_dataset saccharomycetes_odb10.2020-08-05.tar.gz\"\n")
+		f_log.write("If after Mabs-hifiasm finishes you see a file ./Mabs_results/The_best_assembly/assembly.fasta which has a size of approximately 200 kilobytes, then the test succeeded.\n\n")
 	
 	#если пользователь сказал скачать файл с базой BUSCO или сам дал файл (но не папку), то разархивирую файл и меняю значение переменной s_path_to_a_local_busco_dataset с пути к файлу на путь к папке.
 	if os.path.isfile(s_path_to_a_local_busco_dataset):
@@ -500,7 +543,7 @@
 		s_path_to_a_local_busco_dataset = s_path_to_the_output_folder + "/" + s_busco_dataset_name
 
 	#Оставляю из базы BUSCO только нужное количество (s_number_of_busco_orthogroups_to_use) ортогрупп — тех, которые имеют наиболее консервативные последовательности. Если пользователь указал использовать все ортогруппы, то Mabs-hifiasm использует все. Если пользователь указал больше ортогрупп, чем есть в этом наборе BUSCO, то Mabs-hifiasm использует все и пишет Warning в основной файл с логами.
-	mabs_function_preprocess_busco_dataset.function_preprocess_busco_dataset(s_path_to_a_local_busco_dataset, s_number_of_busco_orthogroups_to_use, s_path_to_the_output_folder, f_logs)
+	mabs_function_preprocess_busco_dataset.function_preprocess_busco_dataset(s_path_to_a_local_busco_dataset, s_number_of_busco_orthogroups_to_use, s_path_to_the_output_folder, f_log)
 
 	#делаю ссылку на файл "ancestral", давая ему расширение .fasta. Затем делаю базу данных DIAMOND.
 	#с помощью os.path.abspath() я получают абсолютный путь. Если он относительный, то это может создать проблемы в работоспособности мягкой ссылки.
@@ -523,47 +566,55 @@
 	else:
 		s_output_extension = "fasta"
 	
+	#Проверяю, что DIAMOND выдал файл. Файла может не быть, если у DIAMOND были какие-то проблемы при запуске (см. https://github.com/shelkmike/Mabs/issues/3)
+	if not os.path.exists(s_path_to_the_output_folder + "/diamond_results_for_alignment_of_pacbio_hifi_reads_to_busco_proteins.txt"):
+		print("Mabs-hifiasm has stopped because there was an error during DIAMOND execution.")
+		sys.exit()
+	
 	os.system("python3 " + s_path_to_the_folder_where_Mabs_lies + "/Additional/get_single_end_reads_from_DIAMOND_results.py " + s_path_to_pacbio_hifi_reads + " " + s_path_to_the_output_folder + "/diamond_results_for_alignment_of_pacbio_hifi_reads_to_busco_proteins.txt " + s_path_to_the_output_folder + "/pacbio_hifi_reads_that_have_matches_to_busco_proteins." + s_output_extension)
 	
 	s_path_to_pacbio_hifi_reads_that_correspond_to_busco_genes = s_path_to_the_output_folder + "/pacbio_hifi_reads_that_have_matches_to_busco_proteins." + s_output_extension
 
-	#Теперь, собственно, начинаю проверку 10 точек методом золотого сечения. Параметр, который я оптимизирую, это параметр "-s" Hifiasm. Стартовый интервал -s: [0;1]. n_point_1 это самая левая в данный момент точка (то есть, с наименьшим -s), n_point_4 это самая правая (то есть, с наибольшим -s), а n_point_2 и n_point_3 это две промежуточные, положение которых, собственно, и определяется золотым сечением.
-	n_point_1 = 0 #Нижняя граница пробуемых -s.
-	n_point_4 = 1 #Верхняя граница пробуемых -s.
-	n_point_2 = round(n_point_1 + ((math.sqrt(5) - 1) / (math.sqrt(5) + 1))*(n_point_4 - n_point_1), 3) #округлю до третьего знака после запятой, иначе у Питона иногда вылезают числа вроде 0.144200000001
-	n_point_3 = round(n_point_4 - ((math.sqrt(5) - 1) / (math.sqrt(5) + 1))*(n_point_4 - n_point_1), 3)
+	#Теперь, собственно, начинаю проверку 10 точек методом золотого сечения. Параметр, который я оптимизирую, это параметр "-s" Hifiasm. Стартовый интервал -s: [0;1]. n_golden_section_point_1 это самая левая в данный момент точка (то есть, с наименьшим -s), n_golden_section_point_4 это самая правая (то есть, с наибольшим -s), а n_golden_section_point_2 и n_golden_section_point_3 это две промежуточные, положение которых, собственно, и определяется золотым сечением.
+	#ВАЖНО: возможно, в будущем нужно изменить терминологию. Потому что сейчас можно перепутать обозначения четырёх точек метода золотого сечения и те "точки", которые в n_number_of_the_point_under_analysis
+	n_golden_section_point_1 = 0 #Нижняя граница пробуемых -s.
+	n_golden_section_point_4 = 1 #Верхняя граница пробуемых -s.
+	n_golden_section_point_2 = round(n_golden_section_point_1 + ((math.sqrt(5) - 1) / (math.sqrt(5) + 1))*(n_golden_section_point_4 - n_golden_section_point_1), 3) #округлю до третьего знака после запятой, иначе у Питона иногда вылезают числа вроде 0.144200000001
+	n_golden_section_point_3 = round(n_golden_section_point_4 - ((math.sqrt(5) - 1) / (math.sqrt(5) + 1))*(n_golden_section_point_4 - n_golden_section_point_1), 3)
 
 	#Для 0 и 1 (двух крайних точек стартового интервала) я не делаю измерений, потому что метод золотого сечения этого не требует.
 	
 	#Это список, в который для каждого проверенного -s будет записан AG. Ключ это -s, а значение это AG.
 	d_s_to_AG = {} #Например, [0.362] = 762.
+	#Это список, в который для каждого проверенного -s будет записан N50. Ключ это -s, а значение это N50.
+	d_s_to_N50 = {} #Например, [0.362] = 12345678.
 	
 	#Анализирую вторую точку.
 	n_number_of_the_point_under_analysis = 1
-	n_s = n_point_2
+	n_s = n_golden_section_point_2
 
 	o_current_time_and_date = datetime.datetime.now()
 	s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
-	f_logs.write(s_current_time_and_date + "\n")
-	f_logs.write("Mabs-hifiasm started to analyze point " + str(n_number_of_the_point_under_analysis) + " of 10. -s in this point is " + str(n_s) + "\n")
+	f_log.write(s_current_time_and_date + "\n")
+	f_log.write("Mabs-hifiasm started to analyze point " + str(n_number_of_the_point_under_analysis) + " of 10. -s in this point is " + str(n_s) + "\n")
 	
 	
-	os.mkdir(s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s))
+	os.mkdir(s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis))
 	
 	#Делаю сборку, после чего конвертирую файл p_ctg.gfa в FASTA, делая файл assembly.fasta .
-	os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Modified_hifiasm/modified_hifiasm -s " + str(n_s) + " -o " + s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/assembly --only-primary --n-hap " + str(n_ploidy) + " -t " + str(n_number_of_cpu_threads_to_use) + " " + s_additional_hifiasm_parameters + " " + s_command_line_arguments_with_reads_for_Modified_hifiasm)
+	os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Modified_hifiasm/modified_hifiasm -s " + str(n_s) + " -o " + s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly --only-primary --n-hap " + str(n_ploidy) + " -t " + str(n_number_of_cpu_threads_to_use) + " " + s_additional_hifiasm_parameters + " " + s_command_line_arguments_with_reads_for_Modified_hifiasm)
 	
 	#Название выходного файла зависит от того, давал ли пользователь риды Hi-C или нет.
 	#если пользователь не дал риды Hi-C
 	if (s_path_to_hic_short_reads_R1 == ""):
-		s_path_to_gfa_with_primary_contigs = s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/assembly.bp.p_ctg.gfa"
+		s_path_to_gfa_with_primary_contigs = s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly.bp.p_ctg.gfa"
 	#если пользователь дал риды Hi-C.
 	if (s_path_to_hic_short_reads_R1 != ""):
-		s_path_to_gfa_with_primary_contigs = s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/assembly.hic.p_ctg.gfa"
+		s_path_to_gfa_with_primary_contigs = s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly.hic.p_ctg.gfa"
 	
 	#теперь из файла GFA с первичными контигами делаю файл FASTA с ними.
 	f_infile = open(s_path_to_gfa_with_primary_contigs, "r")
-	f_outfile = open(s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/assembly.fasta", "w")
+	f_outfile = open(s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly.fasta", "w")
 	for s_line in f_infile:
 		#S       ptg000001l      AGTTTACGTTGAACAACCTCCAGGGTTTGT...
 		o_regular_expression_results = re.search(r"^[sS]\s+(\S+)\s+(\S+)", s_line)
@@ -572,58 +623,65 @@
 	f_infile.close()
 	f_outfile.close()
 	
-	s_path_to_the_last_assembly_folder = s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/" #путь к последней папке со сборкой. Нужен, чтобы из неё перемещать файлы с расширениями .bin и .utg в новую папку со сборкой. Их присутствие ускоряет сборку.
+	s_path_to_the_last_assembly_folder = s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/" #путь к последней папке со сборкой. Нужен, чтобы из неё перемещать файлы с расширениями .bin и .utg в новую папку со сборкой. Их присутствие ускоряет сборку.
 	
 	#"--number_of_busco_orthogroups all" использую потому, что в папке BUSCO_dataset_to_use уже оставлены только те ортогруппы, которые нужно использовать.
-	os.system("python3 " + s_path_to_the_folder_where_Mabs_lies + "/calculate_AG.py --output_folder " + s_path_to_the_output_folder + "/AG_calculation_for_-s_" + str(n_s) + " --assembly " + s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/assembly.fasta --pacbio_hifi_reads " + s_path_to_pacbio_hifi_reads_that_correspond_to_busco_genes + " --number_of_busco_orthogroups all --local_busco_dataset " + s_path_to_the_output_folder + "/BUSCO_dataset_to_use --use_proovframe false --max_intron_length " + s_maximum_allowed_intron_length + " --threads " + str(n_number_of_cpu_threads_to_use))
+	os.system("python3 " + s_path_to_the_folder_where_Mabs_lies + "/calculate_AG.py --output_folder " + s_path_to_the_output_folder + "/AG_calculation_for_point_" + str(n_number_of_the_point_under_analysis) + " --assembly " + s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly.fasta --pacbio_hifi_reads " + s_path_to_pacbio_hifi_reads_that_correspond_to_busco_genes + " --number_of_busco_orthogroups all --local_busco_dataset " + s_path_to_the_output_folder + "/BUSCO_dataset_to_use --use_proovframe false --max_intron_length " + s_maximum_allowed_intron_length + " --threads " + str(n_number_of_cpu_threads_to_use))
 
 	#Беру AG, посчитанный скриптом calculate_AG.py
-	if os.path.isfile(s_path_to_the_output_folder + "/AG_calculation_for_-s_" + str(n_s) + "/AG.txt"):
-		f_infile = open(s_path_to_the_output_folder + "/AG_calculation_for_-s_" + str(n_s) + "/AG.txt", "r")
+	if os.path.isfile(s_path_to_the_output_folder + "/AG_calculation_for_point_" + str(n_number_of_the_point_under_analysis) + "/AG.txt"):
+		f_infile = open(s_path_to_the_output_folder + "/AG_calculation_for_point_" + str(n_number_of_the_point_under_analysis) + "/AG.txt", "r")
 		s_line_1 = f_infile.readline()
+		f_infile.close()
 		#AG is 487
 		o_regular_expression_results = re.search(r"AG is (\d+)", s_line_1)
-		n_AG_for_point_2 = int(o_regular_expression_results.group(1))
+		n_AG_for_golden_section_point_2 = int(o_regular_expression_results.group(1))
 	else:
-		f_logs.write("Error. Couldn't calculate AG. See stderr and stdout for the reason why.")
+		f_log.write("Error. Couldn't calculate AG. See stderr and stdout for the reason why.")
 		sys.exit()
 	
-	d_s_to_AG[n_s] = n_AG_for_point_2
+	d_s_to_AG[n_s] = n_AG_for_golden_section_point_2
+	
+	#Считаю N50
+	s_command_output = subprocess.getoutput("python3 " + s_path_to_the_folder_where_Mabs_lies + "/Additional/calculate_N50.py " + s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly.fasta") #скрипт calculate_N50.py выдаёт N50.
+	s_command_output = re.sub(r"\n", r"", s_command_output) #удаляю символ переноса строки
+	n_N50_for_golden_section_point_2 = int(s_command_output)
+	d_s_to_N50[n_s] = n_N50_for_golden_section_point_2
 	
 	o_current_time_and_date = datetime.datetime.now()
 	s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
-	f_logs.write(s_current_time_and_date + "\n")
-	f_logs.write("AG for -s " + str(n_s) + " is " + str(n_AG_for_point_2) + "\n\n")
+	f_log.write(s_current_time_and_date + "\n")
+	f_log.write("For -s = " + str(n_s) + ": AG = " + str(n_AG_for_golden_section_point_2) + " and N50 = " + str(n_N50_for_golden_section_point_2) + "\n\n")
 
 	#Анализирую третью точку.
 	n_number_of_the_point_under_analysis += 1
-	n_s = n_point_3
+	n_s = n_golden_section_point_3
 
 	o_current_time_and_date = datetime.datetime.now()
 	s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
-	f_logs.write(s_current_time_and_date + "\n")
-	f_logs.write("Mabs-hifiasm started to analyze point " + str(n_number_of_the_point_under_analysis) + " of 10. -s in this point is " + str(n_s) + "\n")
+	f_log.write(s_current_time_and_date + "\n")
+	f_log.write("Mabs-hifiasm started to analyze point " + str(n_number_of_the_point_under_analysis) + " of 10. -s in this point is " + str(n_s) + "\n")
 	
-	os.mkdir(s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s))
+	os.mkdir(s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis))
 	
 	#Делаю сборку, после чего конвертирую файл p_ctg.gfa в FASTA, делая файл assembly.fasta .
 	
 	#Перемещаю из прошлой папки со сборкой в эту файлы, названия которых имеют форму *.bin или *utg*. Присутствие этих файлов ускоряет сборку.
-	os.system("mv " + s_path_to_the_last_assembly_folder + "/*.bin " + s_path_to_the_last_assembly_folder + "/*utg* " + s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/")
+	os.system("mv " + s_path_to_the_last_assembly_folder + "/*.bin " + s_path_to_the_last_assembly_folder + "/*utg* " + s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/")
 	
-	os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Modified_hifiasm/modified_hifiasm -s " + str(n_s) + " -o " + s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/assembly --only-primary --n-hap " + str(n_ploidy) + " -t " + str(n_number_of_cpu_threads_to_use) + " " + s_additional_hifiasm_parameters + " " + s_command_line_arguments_with_reads_for_Modified_hifiasm)
+	os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Modified_hifiasm/modified_hifiasm -s " + str(n_s) + " -o " + s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly --only-primary --n-hap " + str(n_ploidy) + " -t " + str(n_number_of_cpu_threads_to_use) + " " + s_additional_hifiasm_parameters + " " + s_command_line_arguments_with_reads_for_Modified_hifiasm)
 	
 	#Название выходного файла зависит от того, давал ли пользователь риды Hi-C или нет.
 	#если пользователь не дал риды Hi-C
 	if (s_path_to_hic_short_reads_R1 == ""):
-		s_path_to_gfa_with_primary_contigs = s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/assembly.bp.p_ctg.gfa"
+		s_path_to_gfa_with_primary_contigs = s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly.bp.p_ctg.gfa"
 	#если пользователь дал риды Hi-C.
 	if (s_path_to_hic_short_reads_R1 != ""):
-		s_path_to_gfa_with_primary_contigs = s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/assembly.hic.p_ctg.gfa"
+		s_path_to_gfa_with_primary_contigs = s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly.hic.p_ctg.gfa"
 	
 	#теперь из файла GFA с первичными контигами делаю файл FASTA с ними.
 	f_infile = open(s_path_to_gfa_with_primary_contigs, "r")
-	f_outfile = open(s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/assembly.fasta", "w")
+	f_outfile = open(s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly.fasta", "w")
 	for s_line in f_infile:
 		#S       ptg000001l      AGTTTACGTTGAACAACCTCCAGGGTTTGT...
 		o_regular_expression_results = re.search(r"^[sS]\s+(\S+)\s+(\S+)", s_line)
@@ -632,73 +690,85 @@
 	f_infile.close()
 	f_outfile.close()
 
-	s_path_to_the_last_assembly_folder = s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/" #путь к последней папке со сборкой. Нужен, чтобы перемещать из неё в новую папку со сборкой файлы, названия которых имеют форму *.bin или *utg*. Присутствие этих файлов ускоряет сборку.
+	s_path_to_the_last_assembly_folder = s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/" #путь к последней папке со сборкой. Нужен, чтобы перемещать из неё в новую папку со сборкой файлы, названия которых имеют форму *.bin или *utg*. Присутствие этих файлов ускоряет сборку.
 	
 	#"--number_of_busco_orthogroups all" использую потому, что в папке BUSCO_dataset_to_use уже оставлены только те ортогруппы, которые нужно использовать.
-	os.system("python3 " + s_path_to_the_folder_where_Mabs_lies + "/calculate_AG.py --output_folder " + s_path_to_the_output_folder + "/AG_calculation_for_-s_" + str(n_s) + " --assembly " + s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/assembly.fasta --pacbio_hifi_reads " + s_path_to_pacbio_hifi_reads_that_correspond_to_busco_genes + " --number_of_busco_orthogroups all --local_busco_dataset " + s_path_to_the_output_folder + "/BUSCO_dataset_to_use --use_proovframe false --max_intron_length " + s_maximum_allowed_intron_length + " --threads " + str(n_number_of_cpu_threads_to_use))
+	os.system("python3 " + s_path_to_the_folder_where_Mabs_lies + "/calculate_AG.py --output_folder " + s_path_to_the_output_folder + "/AG_calculation_for_point_" + str(n_number_of_the_point_under_analysis) + " --assembly " + s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly.fasta --pacbio_hifi_reads " + s_path_to_pacbio_hifi_reads_that_correspond_to_busco_genes + " --number_of_busco_orthogroups all --local_busco_dataset " + s_path_to_the_output_folder + "/BUSCO_dataset_to_use --use_proovframe false --max_intron_length " + s_maximum_allowed_intron_length + " --threads " + str(n_number_of_cpu_threads_to_use))
 
 	#Беру AG, посчитанный скриптом calculate_AG.py
-	if os.path.isfile(s_path_to_the_output_folder + "/AG_calculation_for_-s_" + str(n_s) + "/AG.txt"):
-		f_infile = open(s_path_to_the_output_folder + "/AG_calculation_for_-s_" + str(n_s) + "/AG.txt", "r")
+	if os.path.isfile(s_path_to_the_output_folder + "/AG_calculation_for_point_" + str(n_number_of_the_point_under_analysis) + "/AG.txt"):
+		f_infile = open(s_path_to_the_output_folder + "/AG_calculation_for_point_" + str(n_number_of_the_point_under_analysis) + "/AG.txt", "r")
 		s_line_1 = f_infile.readline()
+		f_infile.close()
 		#AG is 487
 		o_regular_expression_results = re.search(r"AG is (\d+)", s_line_1)
-		n_AG_for_point_3 = int(o_regular_expression_results.group(1))
+		n_AG_for_golden_section_point_3 = int(o_regular_expression_results.group(1))
 	else:
-		f_logs.write("Error. Couldn't calculate AG. See stderr and stdout for the reason why.")
+		f_log.write("Error. Couldn't calculate AG. See stderr and stdout for the reason why.")
 		sys.exit()
 	
-	d_s_to_AG[n_s] = n_AG_for_point_3
+	d_s_to_AG[n_s] = n_AG_for_golden_section_point_3
+	
+	#Считаю N50
+	s_command_output = subprocess.getoutput("python3 " + s_path_to_the_folder_where_Mabs_lies + "/Additional/calculate_N50.py " + s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly.fasta") #скрипт calculate_N50.py выдаёт N50.
+	s_command_output = re.sub(r"\n", r"", s_command_output) #удаляю символ переноса строки
+	n_N50_for_golden_section_point_3 = int(s_command_output)
+	d_s_to_N50[n_s] = n_N50_for_golden_section_point_3
 	
 	o_current_time_and_date = datetime.datetime.now()
 	s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
-	f_logs.write(s_current_time_and_date + "\n")
-	f_logs.write("AG for -s " + str(n_s) + " is " + str(n_AG_for_point_3) + "\n\n")
+	f_log.write(s_current_time_and_date + "\n")
+	f_log.write("For -s = " + str(n_s) + ": AG = " + str(n_AG_for_golden_section_point_3) + " and N50 = " + str(n_N50_for_golden_section_point_3) + "\n\n")
 
 	#теперь последовательно выбираю остальные 8 точек методом золотого сечения и меряю AG для них.
 	while n_number_of_the_point_under_analysis < 10: #"<", а не "<=", потому что увеличение номера точки здесь делается в начале цикла.
 		n_number_of_the_point_under_analysis += 1
 		
-		#Смотрю, какая из двух центральных точек (вторая или третья) имеют меньшее значение AG. Если вторая имеет меньшее ли равное третьей, то выкидываю первую точку и сужаю интервал. Если третья имеет меньшее, чем вторая, то выкидываю четвёртую точку и сужаю интервал. При равных значениях выкидывыю правую. Правую выкидываю потому, что по моим впечатлениям оптимальный -s чаще бывает ближе к 0, чем к 1.
-		if n_AG_for_point_2 < n_AG_for_point_3:
-			n_point_1 = n_point_2
-			n_point_2 = n_point_3
-			#n_point_4 не меняется
-			n_point_3 = round((n_point_4 - ((math.sqrt(5) - 1) / (math.sqrt(5) + 1))*(n_point_4 - n_point_1)), 3)
+		#Смотрю, какая из двух центральных точек (вторая или третья) имеют меньшее значение AG. Если вторая имеет меньшее, то выкидываю первую точку и сужаю интервал. Если третья имеет меньшее, чем вторая, то выкидываю четвёртую точку и сужаю интервал. При равных значениях AG делаю такое же сравнение, но для N50. Если и N50 равные, то выкидываю правую точку. Правую выкидываю потому, что по моим впечатлениям оптимальный -s чаще бывает ближе к 0, чем к 1.
+		if (n_AG_for_golden_section_point_2 < n_AG_for_golden_section_point_3) or ((n_AG_for_golden_section_point_2 == n_AG_for_golden_section_point_3) and (n_N50_for_golden_section_point_2 < n_N50_for_golden_section_point_3)):
+			n_golden_section_point_1 = n_golden_section_point_2
+			n_golden_section_point_2 = n_golden_section_point_3
+			#n_golden_section_point_4 не меняется
+			n_golden_section_point_3 = round((n_golden_section_point_4 - ((math.sqrt(5) - 1) / (math.sqrt(5) + 1))*(n_golden_section_point_4 - n_golden_section_point_1)), 3)
+			
+			n_AG_for_golden_section_point_1 = n_AG_for_golden_section_point_2
+			n_N50_for_golden_section_point_1 = n_N50_for_golden_section_point_2
+			
+			n_AG_for_golden_section_point_2 = n_AG_for_golden_section_point_3
+			n_N50_for_golden_section_point_2 = n_N50_for_golden_section_point_3
 			
-			n_AG_for_point_1 = n_AG_for_point_2
-			n_AG_for_point_2 = n_AG_for_point_3
-			#n_AG_for_point_4 не меняется
-			n_AG_for_point_3 = -100 #плейсхолдер. Всё равно это значение я сейчас посчитаю.
+			#n_AG_for_golden_section_point_4 и n_N50_for_golden_section_point_4 не меняется
+			n_AG_for_golden_section_point_3 = -100 #плейсхолдер. Всё равно это значение я сейчас посчитаю.
+			n_N50_for_golden_section_point_3 = -100 #плейсхолдер. Всё равно это значение я сейчас посчитаю.
 			
 			#Анализирую третью точку.
-			n_s = n_point_3
+			n_s = n_golden_section_point_3
 
 			o_current_time_and_date = datetime.datetime.now()
 			s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
-			f_logs.write(s_current_time_and_date + "\n")
-			f_logs.write("Mabs-hifiasm started to analyze point " + str(n_number_of_the_point_under_analysis) + " of 10. -s in this point is " + str(n_s) + "\n")
+			f_log.write(s_current_time_and_date + "\n")
+			f_log.write("Mabs-hifiasm started to analyze point " + str(n_number_of_the_point_under_analysis) + " of 10. -s in this point is " + str(n_s) + "\n")
 			
-			os.mkdir(s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s))
+			os.mkdir(s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis))
 			
 			#Делаю сборку, после чего конвертирую файл p_ctg.gfa в FASTA, делая файл assembly.fasta .
 			
 			#Перемещаю из прошлой папки со сборкой в эту файлы, названия которых имеют форму *.bin или *utg*. Присутствие этих файлов ускоряет сборку.
-			os.system("mv " + s_path_to_the_last_assembly_folder + "/*.bin " + s_path_to_the_last_assembly_folder + "/*utg* " + s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/")
+			os.system("mv " + s_path_to_the_last_assembly_folder + "/*.bin " + s_path_to_the_last_assembly_folder + "/*utg* " + s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/")
 			
-			os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Modified_hifiasm/modified_hifiasm -s " + str(n_s) + " -o " + s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/assembly --only-primary --n-hap " + str(n_ploidy) + " -t " + str(n_number_of_cpu_threads_to_use) + " " + s_additional_hifiasm_parameters + " " + s_command_line_arguments_with_reads_for_Modified_hifiasm)
+			os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Modified_hifiasm/modified_hifiasm -s " + str(n_s) + " -o " + s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly --only-primary --n-hap " + str(n_ploidy) + " -t " + str(n_number_of_cpu_threads_to_use) + " " + s_additional_hifiasm_parameters + " " + s_command_line_arguments_with_reads_for_Modified_hifiasm)
 	
 			#Название выходного файла зависит от того, давал ли пользователь риды Hi-C или нет.
 			#если пользователь не дал риды Hi-C
 			if (s_path_to_hic_short_reads_R1 == ""):
-				s_path_to_gfa_with_primary_contigs = s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/assembly.bp.p_ctg.gfa"
+				s_path_to_gfa_with_primary_contigs = s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly.bp.p_ctg.gfa"
 			#если пользователь дал риды Hi-C.
 			if (s_path_to_hic_short_reads_R1 != ""):
-				s_path_to_gfa_with_primary_contigs = s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/assembly.hic.p_ctg.gfa"
+				s_path_to_gfa_with_primary_contigs = s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly.hic.p_ctg.gfa"
 			
 			#теперь из файла GFA с первичными контигами делаю файл FASTA с ними.
 			f_infile = open(s_path_to_gfa_with_primary_contigs, "r")
-			f_outfile = open(s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/assembly.fasta", "w")
+			f_outfile = open(s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly.fasta", "w")
 			for s_line in f_infile:
 				#S       ptg000001l      AGTTTACGTTGAACAACCTCCAGGGTTTGT...
 				o_regular_expression_results = re.search(r"^[sS]\s+(\S+)\s+(\S+)", s_line)
@@ -707,68 +777,80 @@
 			f_infile.close()
 			f_outfile.close()
 			
-			s_path_to_the_last_assembly_folder = s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/" #путь к последней папке со сборкой. Нужен, чтобы перемещать из неё в новую папку со сборкой файлы, названия которых имеют форму *.bin или *utg*. Присутствие этих файлов ускоряет сборку.
+			s_path_to_the_last_assembly_folder = s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/" #путь к последней папке со сборкой. Нужен, чтобы перемещать из неё в новую папку со сборкой файлы, названия которых имеют форму *.bin или *utg*. Присутствие этих файлов ускоряет сборку.
 			
 			#"--number_of_busco_orthogroups all" использую потому, что в папке BUSCO_dataset_to_use уже оставлены только те ортогруппы, которые нужно использовать.
-			os.system("python3 " + s_path_to_the_folder_where_Mabs_lies + "/calculate_AG.py --output_folder " + s_path_to_the_output_folder + "/AG_calculation_for_-s_" + str(n_s) + " --assembly " + s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/assembly.fasta --pacbio_hifi_reads " + s_path_to_pacbio_hifi_reads_that_correspond_to_busco_genes + " --number_of_busco_orthogroups all --local_busco_dataset " + s_path_to_the_output_folder + "/BUSCO_dataset_to_use --use_proovframe false --max_intron_length " + s_maximum_allowed_intron_length + " --threads " + str(n_number_of_cpu_threads_to_use))
+			os.system("python3 " + s_path_to_the_folder_where_Mabs_lies + "/calculate_AG.py --output_folder " + s_path_to_the_output_folder + "/AG_calculation_for_point_" + str(n_number_of_the_point_under_analysis) + " --assembly " + s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly.fasta --pacbio_hifi_reads " + s_path_to_pacbio_hifi_reads_that_correspond_to_busco_genes + " --number_of_busco_orthogroups all --local_busco_dataset " + s_path_to_the_output_folder + "/BUSCO_dataset_to_use --use_proovframe false --max_intron_length " + s_maximum_allowed_intron_length + " --threads " + str(n_number_of_cpu_threads_to_use))
 
 			#Беру AG, посчитанный скриптом calculate_AG.py
-			if os.path.isfile(s_path_to_the_output_folder + "/AG_calculation_for_-s_" + str(n_s) + "/AG.txt"):
-				f_infile = open(s_path_to_the_output_folder + "/AG_calculation_for_-s_" + str(n_s) + "/AG.txt", "r")
+			if os.path.isfile(s_path_to_the_output_folder + "/AG_calculation_for_point_" + str(n_number_of_the_point_under_analysis) + "/AG.txt"):
+				f_infile = open(s_path_to_the_output_folder + "/AG_calculation_for_point_" + str(n_number_of_the_point_under_analysis) + "/AG.txt", "r")
 				s_line_1 = f_infile.readline()
+				f_infile.close()
 				#AG is 487
 				o_regular_expression_results = re.search(r"AG is (\d+)", s_line_1)
-				n_AG_for_point_3 = int(o_regular_expression_results.group(1))
+				n_AG_for_golden_section_point_3 = int(o_regular_expression_results.group(1))
 			else:
-				f_logs.write("Error. Couldn't calculate AG. See stderr and stdout for the reason why.")
+				f_log.write("Error. Couldn't calculate AG. See stderr and stdout for the reason why.")
 				sys.exit()
 			
-			d_s_to_AG[n_s] = n_AG_for_point_3
+			d_s_to_AG[n_s] = n_AG_for_golden_section_point_3
+			
+			#Считаю N50
+			s_command_output = subprocess.getoutput("python3 " + s_path_to_the_folder_where_Mabs_lies + "/Additional/calculate_N50.py " + s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly.fasta") #скрипт calculate_N50.py выдаёт N50.
+			s_command_output = re.sub(r"\n", r"", s_command_output) #удаляю символ переноса строки
+			n_N50_for_golden_section_point_3 = int(s_command_output)
+			d_s_to_N50[n_s] = n_N50_for_golden_section_point_3
 			
 			o_current_time_and_date = datetime.datetime.now()
 			s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
-			f_logs.write(s_current_time_and_date + "\n")
-			f_logs.write("AG for -s " + str(n_s) + " is " + str(n_AG_for_point_3) + "\n\n")
+			f_log.write(s_current_time_and_date + "\n")
+			f_log.write("For -s = " + str(n_s) + ": AG = " + str(n_AG_for_golden_section_point_3) + " and N50 = " + str(n_N50_for_golden_section_point_3) + "\n\n")
+			
+		elif (n_AG_for_golden_section_point_2 > n_AG_for_golden_section_point_3)  or ((n_AG_for_golden_section_point_2 == n_AG_for_golden_section_point_3) and (n_N50_for_golden_section_point_2 >= n_N50_for_golden_section_point_3)):
+			#n_golden_section_point_1 не меняется
+			n_golden_section_point_4 = n_golden_section_point_3
+			n_golden_section_point_3 = n_golden_section_point_2
+			n_golden_section_point_2 = round((n_golden_section_point_1 + ((math.sqrt(5) - 1) / (math.sqrt(5) + 1))*(n_golden_section_point_4 - n_golden_section_point_1)), 3)
 			
-		elif n_AG_for_point_2 >= n_AG_for_point_3:
-			#n_point_1 не меняется
-			n_point_4 = n_point_3
-			n_point_3 = n_point_2
-			n_point_2 = round((n_point_1 + ((math.sqrt(5) - 1) / (math.sqrt(5) + 1))*(n_point_4 - n_point_1)), 3)
+			#n_AG_for_golden_section_point_1 и n_N50_for_golden_section_point_1 не меняются
+			n_AG_for_golden_section_point_4 = n_AG_for_golden_section_point_3
+			n_N50_for_golden_section_point_4 = n_N50_for_golden_section_point_3
 			
-			#n_AG_for_point_1 не меняется
-			n_AG_for_point_4 = n_AG_for_point_3
-			n_AG_for_point_3 = n_AG_for_point_2
-			n_AG_for_point_2 = -100 #плейсхолдер. Всё равно это значение я сейчас посчитаю.
+			n_AG_for_golden_section_point_3 = n_AG_for_golden_section_point_2
+			n_N50_for_golden_section_point_3 = n_N50_for_golden_section_point_2
+			
+			n_AG_for_golden_section_point_2 = -100 #плейсхолдер. Всё равно это значение я сейчас посчитаю.
+			n_N50_for_golden_section_point_2 = -100 #плейсхолдер. Всё равно это значение я сейчас посчитаю.
 			
 			#Анализирую вторую точку.
-			n_s = n_point_2
+			n_s = n_golden_section_point_2
 
 			o_current_time_and_date = datetime.datetime.now()
 			s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
-			f_logs.write(s_current_time_and_date + "\n")
-			f_logs.write("Mabs-hifiasm started to analyze point " + str(n_number_of_the_point_under_analysis) + " of 10. -s in this point is " + str(n_s) + "\n")
+			f_log.write(s_current_time_and_date + "\n")
+			f_log.write("Mabs-hifiasm started to analyze point " + str(n_number_of_the_point_under_analysis) + " of 10. -s in this point is " + str(n_s) + "\n")
 			
-			os.mkdir(s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s))
+			os.mkdir(s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis))
 			
 			#Делаю сборку, после чего конвертирую файл p_ctg.gfa в FASTA, делая файл assembly.fasta .
 			
 			#Перемещаю из прошлой папки со сборкой в эту файлы, названия которых имеют форму *.bin или *utg*. Присутствие этих файлов ускоряет сборку.
-			os.system("mv " + s_path_to_the_last_assembly_folder + "/*.bin " + s_path_to_the_last_assembly_folder + "/*utg* " + s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/")
+			os.system("mv " + s_path_to_the_last_assembly_folder + "/*.bin " + s_path_to_the_last_assembly_folder + "/*utg* " + s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/")
 			
-			os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Modified_hifiasm/modified_hifiasm -s " + str(n_s) + " -o " + s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/assembly --only-primary --n-hap " + str(n_ploidy) + " -t " + str(n_number_of_cpu_threads_to_use) + " " + s_additional_hifiasm_parameters + " " + s_command_line_arguments_with_reads_for_Modified_hifiasm)
+			os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Modified_hifiasm/modified_hifiasm -s " + str(n_s) + " -o " + s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly --only-primary --n-hap " + str(n_ploidy) + " -t " + str(n_number_of_cpu_threads_to_use) + " " + s_additional_hifiasm_parameters + " " + s_command_line_arguments_with_reads_for_Modified_hifiasm)
 	
 			#Название выходного файла зависит от того, давал ли пользователь риды Hi-C или нет.
 			#если пользователь не дал риды Hi-C
 			if (s_path_to_hic_short_reads_R1 == ""):
-				s_path_to_gfa_with_primary_contigs = s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/assembly.bp.p_ctg.gfa"
+				s_path_to_gfa_with_primary_contigs = s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly.bp.p_ctg.gfa"
 			#если пользователь дал риды Hi-C.
 			if (s_path_to_hic_short_reads_R1 != ""):
-				s_path_to_gfa_with_primary_contigs = s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/assembly.hic.p_ctg.gfa"
+				s_path_to_gfa_with_primary_contigs = s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly.hic.p_ctg.gfa"
 			
 			#теперь из файла GFA с первичными контигами делаю файл FASTA с ними.
 			f_infile = open(s_path_to_gfa_with_primary_contigs, "r")
-			f_outfile = open(s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/assembly.fasta", "w")
+			f_outfile = open(s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly.fasta", "w")
 			for s_line in f_infile:
 				#S       ptg000001l      AGTTTACGTTGAACAACCTCCAGGGTTTGT...
 				o_regular_expression_results = re.search(r"^[sS]\s+(\S+)\s+(\S+)", s_line)
@@ -778,48 +860,63 @@
 			f_outfile.close()
 			
 			#"--number_of_busco_orthogroups all" использую потому, что в папке BUSCO_dataset_to_use уже оставлены только те ортогруппы, которые нужно использовать.
-			os.system("python3 " + s_path_to_the_folder_where_Mabs_lies + "/calculate_AG.py --output_folder " + s_path_to_the_output_folder + "/AG_calculation_for_-s_" + str(n_s) + " --assembly " + s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/assembly.fasta --pacbio_hifi_reads " + s_path_to_pacbio_hifi_reads_that_correspond_to_busco_genes + " --number_of_busco_orthogroups all --local_busco_dataset " + s_path_to_the_output_folder + "/BUSCO_dataset_to_use --use_proovframe false --max_intron_length " + s_maximum_allowed_intron_length + " --threads " + str(n_number_of_cpu_threads_to_use))
+			os.system("python3 " + s_path_to_the_folder_where_Mabs_lies + "/calculate_AG.py --output_folder " + s_path_to_the_output_folder + "/AG_calculation_for_point_" + str(n_number_of_the_point_under_analysis) + " --assembly " + s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly.fasta --pacbio_hifi_reads " + s_path_to_pacbio_hifi_reads_that_correspond_to_busco_genes + " --number_of_busco_orthogroups all --local_busco_dataset " + s_path_to_the_output_folder + "/BUSCO_dataset_to_use --use_proovframe false --max_intron_length " + s_maximum_allowed_intron_length + " --threads " + str(n_number_of_cpu_threads_to_use))
 			
-			s_path_to_the_last_assembly_folder = s_path_to_the_output_folder + "/Assembly_for_-s_" + str(n_s) + "/" #путь к последней папке со сборкой. Нужен, чтобы перемещать из неё в новую папку со сборкой файлы, названия которых имеют форму *.bin или *utg*. Присутствие этих файлов ускоряет сборку.
+			s_path_to_the_last_assembly_folder = s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/" #путь к последней папке со сборкой. Нужен, чтобы перемещать из неё в новую папку со сборкой файлы, названия которых имеют форму *.bin или *utg*. Присутствие этих файлов ускоряет сборку.
 			
 			#Беру AG, посчитанный скриптом calculate_AG.py
-			if os.path.isfile(s_path_to_the_output_folder + "/AG_calculation_for_-s_" + str(n_s) + "/AG.txt"):
-				f_infile = open(s_path_to_the_output_folder + "/AG_calculation_for_-s_" + str(n_s) + "/AG.txt", "r")
+			if os.path.isfile(s_path_to_the_output_folder + "/AG_calculation_for_point_" + str(n_number_of_the_point_under_analysis) + "/AG.txt"):
+				f_infile = open(s_path_to_the_output_folder + "/AG_calculation_for_point_" + str(n_number_of_the_point_under_analysis) + "/AG.txt", "r")
 				s_line_1 = f_infile.readline()
+				f_infile.close()
 				#AG is 487
 				o_regular_expression_results = re.search(r"AG is (\d+)", s_line_1)
-				n_AG_for_point_2 = int(o_regular_expression_results.group(1))
+				n_AG_for_golden_section_point_2 = int(o_regular_expression_results.group(1))
 			else:
-				f_logs.write("Error. Couldn't calculate AG. See stderr and stdout for the reason why.")
+				f_log.write("Error. Couldn't calculate AG. See stderr and stdout for the reason why.")
+			
+			d_s_to_AG[n_s] = n_AG_for_golden_section_point_2
 			
-			d_s_to_AG[n_s] = n_AG_for_point_2
+			#Считаю N50
+			s_command_output = subprocess.getoutput("python3 " + s_path_to_the_folder_where_Mabs_lies + "/Additional/calculate_N50.py " + s_path_to_the_output_folder + "/Assembly_for_point_" + str(n_number_of_the_point_under_analysis) + "/assembly.fasta") #скрипт calculate_N50.py выдаёт N50.
+			s_command_output = re.sub(r"\n", r"", s_command_output) #удаляю символ переноса строки
+			n_N50_for_golden_section_point_2 = int(s_command_output)
+			d_s_to_N50[n_s] = n_N50_for_golden_section_point_2
 			
 			o_current_time_and_date = datetime.datetime.now()
 			s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
-			f_logs.write(s_current_time_and_date + "\n")
-			f_logs.write("AG for -s " + str(n_s) + " is " + str(n_AG_for_point_2) + "\n\n")
+			f_log.write(s_current_time_and_date + "\n")
+			f_log.write("For -s = " + str(n_s) + ": AG = " + str(n_AG_for_golden_section_point_2) + " and N50 = " + str(n_N50_for_golden_section_point_2) + "\n\n")
 
-	#После того, как посчитал AG для всех 10 точек, я смотрю, какая из них дала лучший AG. После этого делаю сборку для этого значения "-s", но на этот раз без использования параметра Modified_hifiasm "--only-primary", потому что тут я хочу сделать все файлы, в том числе файлы с фазированной сборкой — может быть, они будут полезны пользователю. Если две точки дают одинаковый AG, то, для определённости, выбираю ту из них, которая имеет меньший -s.
-	n_s_that_provides_maximum_AG = -100
-	n_maximum_AG = -100
+
+	#После того, как посчитал AG для всех 10 точек, я смотрю, какая из них дала наибольший AG. Если наибольший AG принадлежит сразу нескольким точкам, то выбираю ту из них, которая дала больший N50. Если несколько точек дают одинаковый AG и N50, то, для определённости, выбираю ту из них, которая имеет меньший -s. После этого делаю сборку для этого значения "-s", но на этот раз без использования параметра Modified_hifiasm "--only-primary", потому что тут я хочу сделать все файлы, в том числе файлы с фазированной сборкой — может быть, они будут полезны пользователю. 
+	
+	n_s_that_makes_the_best_assembly = -100
+	n_AG_in_the_best_point = -100
+	n_N50_in_the_best_point = -100
 	for n_s in d_s_to_AG:
-		if d_s_to_AG[n_s] > n_maximum_AG:
-			n_s_that_provides_maximum_AG = n_s
-			n_maximum_AG = d_s_to_AG[n_s]
-		
-		if (d_s_to_AG[n_s] == n_maximum_AG) and (n_s < n_s_that_provides_maximum_AG):
-			n_s_that_provides_maximum_AG = n_s
+		if d_s_to_AG[n_s] > n_AG_in_the_best_point:
+			n_s_that_makes_the_best_assembly = n_s
+			n_AG_in_the_best_point = d_s_to_AG[n_s]
+			n_N50_in_the_best_point = d_s_to_N50[n_s]
+		elif (d_s_to_AG[n_s] == n_AG_in_the_best_point) and (d_s_to_N50[n_s] > n_N50_in_the_best_point):
+			n_s_that_makes_the_best_assembly = n_s
+			n_AG_in_the_best_point = d_s_to_AG[n_s]
+			n_N50_in_the_best_point = d_s_to_N50[n_s]
+		elif (d_s_to_AG[n_s] == n_AG_in_the_best_point) and (d_s_to_N50[n_s] == n_N50_in_the_best_point) and (n_s < n_s_that_makes_the_best_assembly):
+			
+			n_s_that_makes_the_best_assembly = n_s
 	
 	o_current_time_and_date = datetime.datetime.now()
 	s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
-	f_logs.write(s_current_time_and_date + "\n")
-	f_logs.write("The optimal -s is " + str(n_s_that_provides_maximum_AG) + ", it provides AG = " + str(n_maximum_AG) + ". Now performing a full assembly for this value of -s.\n\n")
+	f_log.write(s_current_time_and_date + "\n")
+	f_log.write("The optimal -s is " + str(n_s_that_makes_the_best_assembly) + ", it provides AG = " + str(n_AG_in_the_best_point) + " and N50 = " + str(n_N50_in_the_best_point) + ". Now performing a full assembly for this value of -s.\n\n")
 	os.mkdir(s_path_to_the_output_folder + "/The_best_assembly")
 				
 	#Перемещаю из прошлой папки со сборкой в эту файлы, названия которых имеют форму *.bin или *utg*. Присутствие этих файлов ускоряет сборку.
 	os.system("mv " + s_path_to_the_last_assembly_folder + "/*.bin " + s_path_to_the_last_assembly_folder + "/*utg* " + s_path_to_the_output_folder + "/The_best_assembly/")
 	
-	os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Modified_hifiasm/modified_hifiasm -s " + str(n_s_that_provides_maximum_AG) + " -o " + s_path_to_the_output_folder + "/The_best_assembly/assembly --n-hap " + str(n_ploidy) + " -t " + str(n_number_of_cpu_threads_to_use) + " " + s_additional_hifiasm_parameters + " " + s_command_line_arguments_with_reads_for_Modified_hifiasm)
+	os.system(s_path_to_the_folder_where_Mabs_lies + "/Additional/Modified_hifiasm/modified_hifiasm -s " + str(n_s_that_makes_the_best_assembly) + " -o " + s_path_to_the_output_folder + "/The_best_assembly/assembly --n-hap " + str(n_ploidy) + " -t " + str(n_number_of_cpu_threads_to_use) + " " + s_additional_hifiasm_parameters + " " + s_command_line_arguments_with_reads_for_Modified_hifiasm)
 	
 	#Название выходного файла зависит от того, давал ли пользователь риды Hi-C или нет.
 	#если пользователь не дал риды Hi-C
@@ -842,8 +939,8 @@
 		
 	o_current_time_and_date = datetime.datetime.now()
 	s_current_time_and_date = o_current_time_and_date.strftime("%H:%M:%S %Y-%m-%d")
-	f_logs.write(s_current_time_and_date + "\n")
-	f_logs.write("Mabs-hifiasm finished. The contigs are in the file " + s_path_to_the_output_folder + "/The_best_assembly/assembly.fasta")
+	f_log.write(s_current_time_and_date + "\n")
+	f_log.write("Mabs-hifiasm finished. The contigs are in the file " + s_path_to_the_output_folder + "/The_best_assembly/assembly.fasta")