Skip to content

Commit

Permalink
Added options --rm-quotes and --add-quotes to gotree rename #9
Browse files Browse the repository at this point in the history
  • Loading branch information
fredericlemoine committed Aug 24, 2020
1 parent a65d200 commit c525fd7
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 12 deletions.
31 changes: 24 additions & 7 deletions cmd/rename.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ var autorename bool
var autorenamelength int
var renameInternalNodes bool
var renameTips bool
var removeQuotes bool
var addQuotes bool
var renameRegex string
var renameReplaceBy string

Expand Down Expand Up @@ -63,6 +65,9 @@ var renameCmd = &cobra.Command{
this will replace all matches of 'Tip(\d+)' with 'Leaf$1', with $1 being the matched string
inside ().
* If --add-quotes is specified, then output names will be surrounded by ''
* If --rm-quotes is specified, starting or ending quotes are removed.
Warning: If after this rename, several tips/nodes have the same name, subsequent commands may
fail.
Expand Down Expand Up @@ -91,7 +96,12 @@ If --internal is specified, then internal nodes are renamed;
return
}

if !autorename && !setregex {
if autorename || setregex || removeQuotes || addQuotes {
if autorenamelength < 5 {
autorenamelength = 5
}
namemap = make(map[string]string)
} else {
// Read map file
if mapfile == "none" {
err = errors.New("map file is not given")
Expand All @@ -103,11 +113,6 @@ If --internal is specified, then internal nodes are renamed;
io.LogError(err)
return
}
} else {
if autorenamelength < 5 {
autorenamelength = 5
}
namemap = make(map[string]string)
}

if f, err = openWriteFile(outtreefile); err != nil {
Expand Down Expand Up @@ -140,6 +145,16 @@ If --internal is specified, then internal nodes are renamed;
io.LogError(err)
return
}
} else if addQuotes {
if err = tr.Tree.AddQuotes(renameInternalNodes, renameTips, namemap); err != nil {
io.LogError(err)
return
}
} else if removeQuotes {
if err = tr.Tree.RemoveQuotes(renameInternalNodes, renameTips, namemap); err != nil {
io.LogError(err)
return
}
} else {
if err = tr.Tree.Rename(namemap); err != nil {
io.LogError(err)
Expand All @@ -150,7 +165,7 @@ If --internal is specified, then internal nodes are renamed;
f.WriteString(tr.Tree.Newick() + "\n")
}

if (autorename || setregex) && mapfile != "none" {
if (autorename || setregex || removeQuotes || addQuotes) && mapfile != "none" {
if err = writeNameMap(namemap, mapfile); err != nil {
io.LogError(err)
return
Expand All @@ -166,6 +181,8 @@ func init() {
renameCmd.Flags().StringVarP(&intreefile, "input", "i", "stdin", "Input tree")
renameCmd.Flags().BoolVar(&renameInternalNodes, "internal", false, "Internal nodes are taken into account")
renameCmd.Flags().BoolVar(&renameTips, "tips", true, "Tips are taken into account (--tips=false to cancel)")
renameCmd.Flags().BoolVar(&addQuotes, "add-quotes", false, "Add quotes arround tip/node names")
renameCmd.Flags().BoolVar(&removeQuotes, "rm-quotes", false, "Remove quotes arround tip/node names (priority over --rm-quotes)")
renameCmd.Flags().StringVarP(&mapfile, "map", "m", "none", "Tip name map file")
renameCmd.Flags().StringVarP(&renameRegex, "regexp", "e", "none", "Regexp to get matching tip/node names")
renameCmd.Flags().StringVarP(&renameReplaceBy, "replace", "b", "none", "String replacement to the given regexp")
Expand Down
12 changes: 10 additions & 2 deletions docs/commands/rename.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,27 @@
### rename
This command renames tips and/or internal nodes of input trees. Several possibilities:

* An existing map file is given (`-m`), and must be tab separated with columns:
* Default: An existing map file is given (`-m`), and must be tab separated with columns:
1) Current name of the tip
2) Desired new name of the tip

- If `--revert` is specified then it is the other way
- If a tip name does not appear in the map file, it will not be renamed.
- If a name that does not exist appears in the map file, it will not throw an error.

Then, by order of priority:

* The `-a` option is given. In this case, tips and/or internal nodes are renamed using automatically generated identifiers of length 10 (or of length `--length`).
- Correspondance between old names and new generated names is written in the map file given with `-m`.
- In this mode, `--revert` has no effect.
- `--length` allows to customize length of generated id. Length is set to 5 if given length is less that 5.
- If several trees in input have different tip names, a new identifier is still generated for each tip name that has never been seen (already seen names are associated to the same new name).

* The `-e` (`--regexp`) and `-b` (`--replace`) is given, then it will replace matching strings in tip/node names by string given by `-b`. It takes advantages of the golang regexp machinery, i.e. it is possible to specify capturing groups and refering to it in the replacement string, for instance: `gotree rename -i tree.nh --regexp 'Tip(\d+)' --replace 'Leaf$1' -m map.txt` will replace all matches of `Tip(\d+)` with `Leaf$1`, $1 being the matched string inside the capturing group `()`.
* The `-e` (`--regexp`) and `-b` (`--replace`) options are given, then it will replace matching strings in tip/node names by string given by `-b`. It takes advantages of the golang regexp machinery, i.e. it is possible to specify capturing groups and refering to it in the replacement string, for instance: `gotree rename -i tree.nh --regexp 'Tip(\d+)' --replace 'Leaf$1' -m map.txt` will replace all matches of `Tip(\d+)` with `Leaf$1`, $1 being the matched string inside the capturing group `()`.

* `--add-quotes` is specified, then output names will be surrounded by single quotes. It replaces starting/ending double quotes by single quotes.

* `--rm-quotes` is specified, then starting or ending single/double quotes are removed.


Other informations:
Expand All @@ -35,6 +41,7 @@ Usage:
gotree rename [flags]
Flags:
--add-quotes Add quotes arround tip/node names
-a, --auto Renames automatically tips with auto generated id of length 10.
-h, --help help for rename
-i, --input string Input tree (default "stdin")
Expand All @@ -45,6 +52,7 @@ Flags:
-e, --regexp string Regexp to get matching tip/node names (default "none")
-b, --replace string String replacement to the given regexp (default "none")
-r, --revert Revert orientation of map file
--rm-quotes Remove quotes arround tip/node names (priority over --rm-quotes)
--tips Tips are taken into account (--tips=false to cancel) (default true)
Global Flags:
Expand Down
84 changes: 81 additions & 3 deletions tree/tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -1492,6 +1492,84 @@ func (t *Tree) RenameRegexp(internals, tips bool, regex, replace string, namemap
return nil
}

// RemoveQuotes removes potential single and double quotes surrounding tip and/or internal node names
//
// internals: Remove quotes at internal nodes names
// tips: Remove quotes at tips names
func (t *Tree) RemoveQuotes(internals, tips bool, namemap map[string]string) error {
for _, n := range t.Nodes() {
if (tips && n.Tip()) || (internals && !n.Tip()) {
name := n.Name()
first := name[0]
last := name[len(name)-1]
firstpos := 0
lastpos := len(name)
if first == '\'' || first == '"' {
firstpos = 1
}
if last == '\'' || last == '"' {
lastpos = lastpos - 1
}
newname := name[firstpos:lastpos]
n.SetName(newname)
namemap[name] = newname
}
}
// After we update bitsets if any, and node indexes
//t.ReinitIndexes()
if err := t.UpdateTipIndex(); err != nil {
return err
}
// err := t.ClearBitSets()
// if err != nil {
// return err
// }
// t.UpdateBitSet()
return nil
}

// AddQuotes adds quotes arround tip and/or internal node names.
//
// If a single quote is already present at the beginning or at the end, it
// leaves it unchanged.
//
// If a double quote is already present at the bebinning or at the end, it
// is replaced by a single quote
//
// internals: Adds quotes at internal nodes
// tips: Adds quotes at tips
func (t *Tree) AddQuotes(internals, tips bool, namemap map[string]string) error {
for _, n := range t.Nodes() {
if (tips && n.Tip()) || (internals && !n.Tip()) {
name := n.Name()
first := name[0]
last := name[len(name)-1]
firstpos := 0
lastpos := len(name)
if first == '\'' || first == '"' {
firstpos = 1
}
if last == '\'' || last == '"' {
lastpos = lastpos - 1
}
newname := fmt.Sprintf("'%s'", name[firstpos:lastpos])
n.SetName(newname)
namemap[name] = newname
}
}
// After we update bitsets if any, and node indexes
//t.ReinitIndexes()
if err := t.UpdateTipIndex(); err != nil {
return err
}
// err := t.ClearBitSets()
// if err != nil {
// return err
// }
// t.UpdateBitSet()
return nil
}

// Clone the given node, copy attributes of the given
// node into a new node
func (t *Tree) CopyNode(n *Node) *Node {
Expand Down Expand Up @@ -1559,7 +1637,7 @@ func (t *Tree) copyTreeRecur(copytree *Tree, copynode, node *Node, edge *Edge) {
}
}

// Assumes that the tree is rooted.
// SubTree Assumes that the tree is rooted.
//
// Otherwise, will consider the pseudo root
// defined by the initial newick file
Expand All @@ -1576,7 +1654,7 @@ func (t *Tree) SubTree(n *Node) *Tree {
return (subtree)
}

// Merges Two rooted trees t and t2 in t by adding a new root node with two children
// Merge merges Two rooted trees t and t2 in t by adding a new root node with two children
// Corresponding to the roots of the 2 trees.
//
// If one of the tree is not rooted, returns an error.
Expand Down Expand Up @@ -1616,7 +1694,7 @@ func (t *Tree) Merge(t2 *Tree) error {
return nil
}

// Returns the deepest edge of the tree (considered unrooted)
// DeepestEdge returns the deepest edge of the tree (considered unrooted)
// in terms of number of tips on the light side of it.
//
// It does not use bitsets, thus they may be uninitialized.
Expand Down

0 comments on commit c525fd7

Please sign in to comment.