From c525fd77f16938188c8d8474915cf4791a99eb85 Mon Sep 17 00:00:00 2001 From: Frederic Lemoine Date: Mon, 24 Aug 2020 15:53:56 +0200 Subject: [PATCH] Added options --rm-quotes and --add-quotes to gotree rename #9 --- cmd/rename.go | 31 +++++++++++---- docs/commands/rename.md | 12 +++++- tree/tree.go | 84 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 115 insertions(+), 12 deletions(-) diff --git a/cmd/rename.go b/cmd/rename.go index 19b5808..920a149 100644 --- a/cmd/rename.go +++ b/cmd/rename.go @@ -14,6 +14,8 @@ var autorename bool var autorenamelength int var renameInternalNodes bool var renameTips bool +var removeQuotes bool +var addQuotes bool var renameRegex string var renameReplaceBy string @@ -63,6 +65,9 @@ var renameCmd = &cobra.Command{ this will replace all matches of 'Tip(\d+)' with 'Leaf$1', with $1 being the matched string inside (). +* If --add-quotes is specified, then output names will be surrounded by '' + +* If --rm-quotes is specified, starting or ending quotes are removed. Warning: If after this rename, several tips/nodes have the same name, subsequent commands may fail. @@ -91,7 +96,12 @@ If --internal is specified, then internal nodes are renamed; return } - if !autorename && !setregex { + if autorename || setregex || removeQuotes || addQuotes { + if autorenamelength < 5 { + autorenamelength = 5 + } + namemap = make(map[string]string) + } else { // Read map file if mapfile == "none" { err = errors.New("map file is not given") @@ -103,11 +113,6 @@ If --internal is specified, then internal nodes are renamed; io.LogError(err) return } - } else { - if autorenamelength < 5 { - autorenamelength = 5 - } - namemap = make(map[string]string) } if f, err = openWriteFile(outtreefile); err != nil { @@ -140,6 +145,16 @@ If --internal is specified, then internal nodes are renamed; io.LogError(err) return } + } else if addQuotes { + if err = tr.Tree.AddQuotes(renameInternalNodes, renameTips, namemap); err != nil { + io.LogError(err) + return + } + } else if removeQuotes { + if err = tr.Tree.RemoveQuotes(renameInternalNodes, renameTips, namemap); err != nil { + io.LogError(err) + return + } } else { if err = tr.Tree.Rename(namemap); err != nil { io.LogError(err) @@ -150,7 +165,7 @@ If --internal is specified, then internal nodes are renamed; f.WriteString(tr.Tree.Newick() + "\n") } - if (autorename || setregex) && mapfile != "none" { + if (autorename || setregex || removeQuotes || addQuotes) && mapfile != "none" { if err = writeNameMap(namemap, mapfile); err != nil { io.LogError(err) return @@ -166,6 +181,8 @@ func init() { renameCmd.Flags().StringVarP(&intreefile, "input", "i", "stdin", "Input tree") renameCmd.Flags().BoolVar(&renameInternalNodes, "internal", false, "Internal nodes are taken into account") renameCmd.Flags().BoolVar(&renameTips, "tips", true, "Tips are taken into account (--tips=false to cancel)") + renameCmd.Flags().BoolVar(&addQuotes, "add-quotes", false, "Add quotes arround tip/node names") + renameCmd.Flags().BoolVar(&removeQuotes, "rm-quotes", false, "Remove quotes arround tip/node names (priority over --rm-quotes)") renameCmd.Flags().StringVarP(&mapfile, "map", "m", "none", "Tip name map file") renameCmd.Flags().StringVarP(&renameRegex, "regexp", "e", "none", "Regexp to get matching tip/node names") renameCmd.Flags().StringVarP(&renameReplaceBy, "replace", "b", "none", "String replacement to the given regexp") diff --git a/docs/commands/rename.md b/docs/commands/rename.md index 207b2a4..21c4fd7 100644 --- a/docs/commands/rename.md +++ b/docs/commands/rename.md @@ -5,7 +5,7 @@ ### rename This command renames tips and/or internal nodes of input trees. Several possibilities: -* An existing map file is given (`-m`), and must be tab separated with columns: +* Default: An existing map file is given (`-m`), and must be tab separated with columns: 1) Current name of the tip 2) Desired new name of the tip @@ -13,13 +13,19 @@ This command renames tips and/or internal nodes of input trees. Several possibil - If a tip name does not appear in the map file, it will not be renamed. - If a name that does not exist appears in the map file, it will not throw an error. +Then, by order of priority: + * The `-a` option is given. In this case, tips and/or internal nodes are renamed using automatically generated identifiers of length 10 (or of length `--length`). - Correspondance between old names and new generated names is written in the map file given with `-m`. - In this mode, `--revert` has no effect. - `--length` allows to customize length of generated id. Length is set to 5 if given length is less that 5. - If several trees in input have different tip names, a new identifier is still generated for each tip name that has never been seen (already seen names are associated to the same new name). -* The `-e` (`--regexp`) and `-b` (`--replace`) is given, then it will replace matching strings in tip/node names by string given by `-b`. It takes advantages of the golang regexp machinery, i.e. it is possible to specify capturing groups and refering to it in the replacement string, for instance: `gotree rename -i tree.nh --regexp 'Tip(\d+)' --replace 'Leaf$1' -m map.txt` will replace all matches of `Tip(\d+)` with `Leaf$1`, $1 being the matched string inside the capturing group `()`. +* The `-e` (`--regexp`) and `-b` (`--replace`) options are given, then it will replace matching strings in tip/node names by string given by `-b`. It takes advantages of the golang regexp machinery, i.e. it is possible to specify capturing groups and refering to it in the replacement string, for instance: `gotree rename -i tree.nh --regexp 'Tip(\d+)' --replace 'Leaf$1' -m map.txt` will replace all matches of `Tip(\d+)` with `Leaf$1`, $1 being the matched string inside the capturing group `()`. + +* `--add-quotes` is specified, then output names will be surrounded by single quotes. It replaces starting/ending double quotes by single quotes. + +* `--rm-quotes` is specified, then starting or ending single/double quotes are removed. Other informations: @@ -35,6 +41,7 @@ Usage: gotree rename [flags] Flags: + --add-quotes Add quotes arround tip/node names -a, --auto Renames automatically tips with auto generated id of length 10. -h, --help help for rename -i, --input string Input tree (default "stdin") @@ -45,6 +52,7 @@ Flags: -e, --regexp string Regexp to get matching tip/node names (default "none") -b, --replace string String replacement to the given regexp (default "none") -r, --revert Revert orientation of map file + --rm-quotes Remove quotes arround tip/node names (priority over --rm-quotes) --tips Tips are taken into account (--tips=false to cancel) (default true) Global Flags: diff --git a/tree/tree.go b/tree/tree.go index 442f9a3..9d79d4a 100644 --- a/tree/tree.go +++ b/tree/tree.go @@ -1492,6 +1492,84 @@ func (t *Tree) RenameRegexp(internals, tips bool, regex, replace string, namemap return nil } +// RemoveQuotes removes potential single and double quotes surrounding tip and/or internal node names +// +// internals: Remove quotes at internal nodes names +// tips: Remove quotes at tips names +func (t *Tree) RemoveQuotes(internals, tips bool, namemap map[string]string) error { + for _, n := range t.Nodes() { + if (tips && n.Tip()) || (internals && !n.Tip()) { + name := n.Name() + first := name[0] + last := name[len(name)-1] + firstpos := 0 + lastpos := len(name) + if first == '\'' || first == '"' { + firstpos = 1 + } + if last == '\'' || last == '"' { + lastpos = lastpos - 1 + } + newname := name[firstpos:lastpos] + n.SetName(newname) + namemap[name] = newname + } + } + // After we update bitsets if any, and node indexes + //t.ReinitIndexes() + if err := t.UpdateTipIndex(); err != nil { + return err + } + // err := t.ClearBitSets() + // if err != nil { + // return err + // } + // t.UpdateBitSet() + return nil +} + +// AddQuotes adds quotes arround tip and/or internal node names. +// +// If a single quote is already present at the beginning or at the end, it +// leaves it unchanged. +// +// If a double quote is already present at the bebinning or at the end, it +// is replaced by a single quote +// +// internals: Adds quotes at internal nodes +// tips: Adds quotes at tips +func (t *Tree) AddQuotes(internals, tips bool, namemap map[string]string) error { + for _, n := range t.Nodes() { + if (tips && n.Tip()) || (internals && !n.Tip()) { + name := n.Name() + first := name[0] + last := name[len(name)-1] + firstpos := 0 + lastpos := len(name) + if first == '\'' || first == '"' { + firstpos = 1 + } + if last == '\'' || last == '"' { + lastpos = lastpos - 1 + } + newname := fmt.Sprintf("'%s'", name[firstpos:lastpos]) + n.SetName(newname) + namemap[name] = newname + } + } + // After we update bitsets if any, and node indexes + //t.ReinitIndexes() + if err := t.UpdateTipIndex(); err != nil { + return err + } + // err := t.ClearBitSets() + // if err != nil { + // return err + // } + // t.UpdateBitSet() + return nil +} + // Clone the given node, copy attributes of the given // node into a new node func (t *Tree) CopyNode(n *Node) *Node { @@ -1559,7 +1637,7 @@ func (t *Tree) copyTreeRecur(copytree *Tree, copynode, node *Node, edge *Edge) { } } -// Assumes that the tree is rooted. +// SubTree Assumes that the tree is rooted. // // Otherwise, will consider the pseudo root // defined by the initial newick file @@ -1576,7 +1654,7 @@ func (t *Tree) SubTree(n *Node) *Tree { return (subtree) } -// Merges Two rooted trees t and t2 in t by adding a new root node with two children +// Merge merges Two rooted trees t and t2 in t by adding a new root node with two children // Corresponding to the roots of the 2 trees. // // If one of the tree is not rooted, returns an error. @@ -1616,7 +1694,7 @@ func (t *Tree) Merge(t2 *Tree) error { return nil } -// Returns the deepest edge of the tree (considered unrooted) +// DeepestEdge returns the deepest edge of the tree (considered unrooted) // in terms of number of tips on the light side of it. // // It does not use bitsets, thus they may be uninitialized.