package main
import (
//<<Imports>>
)
//<<Auxillary variables>>
//<<Functions>>
func main() {
//<<Main function>>
}
//<<Set usage>>
//<<Declare and parse options>>
//<<Read the input>>
//<<Find the LCA of the majority of targets>>
if *optC {
//<<Remove the long branches>>
}
u := "fita [options] [tree file] [list of target genome names]"
p := "Find the target clade subtree."
e := "fita -verbose -c tree.nwk targets.txt"
clio.Usage(u, p, e)
"github.com/evolbioinf/clio"
var optC = flag.Bool("c", false, "complete mode" +
"\n(remove most of the neighbors and filter out" +
"\nthe clades containing branches longer than" +
"\nthe automatically calculated threshold)")
var optVerbose = flag.Bool("verbose", false, "verbose mode" +
"\n(the output may be incompatible with piping)")
flag.Parse()
// Read the tree
r := newOsReader(0)
scNwk := nwk.NewScanner(r)
scNwk.Scan()
// Read the target list
r = newOsReader(1)
var targetList []string
scStr := bufio.NewScanner(r)
for scStr.Scan() {
targetList = append(targetList, scStr.Text())
}
"github.com/evolbioinf/nwk"
"bufio"
func newOsReader(x int) io.Reader {
var err error
var r io.Reader
r, err = os.Open(flag.Args()[x])
if err != nil {
panic(err)
}
return r
}
var losLeaves int = 1
var losVotes int = 0
var winVotes int = 0
var winLeaves []string
var round int = 1
//<<Voting initialization (leaves)>>
//<<The voting (leaves)>>
//<<Print the target clade (leaves)>>
var cRoot *nwk.Node = scNwk.Tree()
var rChild *nwk.Node = cRoot.Child
var lChild *nwk.Node = rChild.Sib
var rLeafLabels, lLeafLabels []string
if *optVerbose {
fmt.Printf("# Step 1: find the target clade... #\n\n")
}
for losVotes != losLeaves {
//<<Reset the previous results (leaves)>>
//<<Get leaf labels>>
//<<Count votes (leaves)>>
//<<Choose winner (leaves)>>
if *optVerbose {
//<<Print the results of the round (leaves)>>
}
round++
cRoot = cRoot.Parent
}
rVotes, lVotes = 0, 0
rLeafLabels, lLeafLabels = nil, nil
// the right child
if rChild.Child == nil {
rLeafLabels = append(rLeafLabels, rChild.Label)
} else {
rLeafLabels = getLeafLabels(rChild.Child, rLeafLabels)
}
// the left child
if lChild.Child == nil {
lLeafLabels = append(lLeafLabels, lChild.Label)
} else {
lLeafLabels = getLeafLabels(lChild.Child, lLeafLabels)
}
func getLeafLabels(n *nwk.Node, l []string) []string {
if n == nil {return l}
if n.Child == nil {
l = append(l, n.Label)
}
l = getLeafLabels(n.Child, l)
l = getLeafLabels(n.Sib, l)
return l
}
rVotes = countVotes(rLeafLabels, targetList)
lVotes = countVotes(lLeafLabels, targetList)
func countVotes(arg1 interface{}, arg2 interface{}) int {
votes := 0
switch items := arg1.(type) {
case []string:
//<<Target leaves vote>>
case []float64:
//<<Short branches vote>>
default:
panic("countVotes() failed: unexpected input")
}
return votes
}
crit := arg2.([]string)
for _, leaf := range items {
if isInString(crit, leaf){
votes++
}
}
func isInString(slice []string, value string) bool {
for _, x := range slice {
if x == value {
return true
}
}
return false
}
crit := arg2.(float64)
for _, length := range items {
if length < crit {
votes++
}
}
var rVotes, lVotes int = 0, 0
if rVotes > lVotes {
cRoot = rChild
winVotes = rVotes
winLeaves = rLeafLabels
rChild = cRoot.Child
lChild = rChild.Sib
losVotes = lVotes
losLeaves = len(lLeafLabels)
} else {
cRoot = lChild
winVotes = lVotes
winLeaves = lLeafLabels
rChild = cRoot.Child
lChild = rChild.Sib
losVotes = rVotes
losLeaves = len(rLeafLabels)
}
fmt.Printf(
"round: %d\twinner: %s\tvotes: %d\tleaves: %d\n",
round, cRoot.Label, winVotes, len(winLeaves))
if *optVerbose {
//<<Print the voting results (leaves)>>
}
if !*optC {
fmt.Printf("(%s;\n", cRoot.String()[1:])
}
fmt.Printf(
"final:\t\tone of the clades consists of the targets\n")
fmt.Printf(
"final:\t\treturn to the parent node (%s)\n" +
"\nThe found target clade has %d of %d targets.\n",
cRoot.Label, winVotes, len(targetList))
fmt.Printf("\nThe target clade:\n\n%s;\n", cRoot.String()[1:])
//<<Voting initialization (branches)>>
//<<The voting (branches)>>
//<<Print the target clade (branches)>>
var allLengths []float64
allLengths = getBranchLengths(cRoot, allLengths)
round = 1
//<<Calculate the branch length threshold>>
func getBranchLengths(n *nwk.Node, l []float64) []float64 {
if n == nil {return l}
if n.HasLength {
l = append(l, n.Length)
}
l = getBranchLengths(n.Child, l)
l = getBranchLengths(n.Sib, l)
return l
}
//<<Branch length clustering>>
//<<Retrieve the threshold>>
//<<Prepare the data>>
//<<DBSCAN clustering>>
sort.Float64s(allLengths)
nVal := len(allLengths)
data := make([][]float64, nVal)
// create an empty table
for i := range data {
data[i] = make([]float64, 2)
}
// fill the table with values
for i, v := range allLengths {
data[i][0] = 0
data[i][1] = v
}
//<<Define DBSCAN parameters>>
//<<Create a clusterer>>
//<<Find clusters>>
var minpts int = 1
var eps float64 = (
data[nVal-1][1] - data[0][1]) / float64(nVal)
if *optVerbose {
fmt.Printf("\n# Step 2: removing long branches... #\n" +
"\neps = %2f\n\n", eps)
}
c, err := clusters.DBSCAN(
minpts, eps, 1, clusters.EuclideanDistance)
if err != nil {
panic(err)
}
"github.com/mpraski/clusters"
if err = c.Learn(data); err != nil {
panic(err)
}
if *optVerbose {
fmt.Printf(
"The branches were set into %d clusters:\n\n",
len(c.Sizes()))
// print the table of cluster sizes
fmt.Printf("Cluster id\tCluster size\n")
for i, n := range c.Sizes() {
fmt.Printf("%d\t\t%d\n",
i+1, n)
}
// print the table of cluster mappings
fmt.Printf("\nBranch length\tCluster id\n")
for i, n := range c.Guesses() {
if n == -1 {
fmt.Printf(
"%2f\toutlier\n",
data[i][1])
} else {
fmt.Printf(
"%2f\t%-3d\n",
data[i][1], n)
}
}
}
percentile := int(math.Floor(float64(c.Sizes()[0])*0.95))
thresholdLen := data[percentile][1]
if *optVerbose {
fmt.Printf(
"\nThreshold branch length = %2f\n", thresholdLen)
}
rChild = cRoot.Child
lChild = rChild.Sib
for !thresholdFound {
//<<Reset the previous results (branches)>>
//<<Get branch lengths>>
//<<Count votes (branches)>>
//<<Choose winner (branches)>>
if *optVerbose {
//<<Print the results of the round (branches)>>
}
round++
tr = isInFloat(losLengths, thresholdLen)
tl = isInFloat(losLengths, thresholdLen)
thresholdFound = tr || tl
}
cRoot = cRoot.Parent
var thresholdFound, tr, tl bool
rVotes, lVotes = 0, 0
rbLengths, lbLengths = nil, nil
// the right child
if rChild.Child == nil {
rbLengths = append(rbLengths, rChild.Length)
} else {
rbLengths = getBranchLengths(rChild.Child, rbLengths)
}
// the left child
if lChild.Child == nil {
lbLengths = append(lbLengths, lChild.Length)
} else {
lbLengths = getBranchLengths(lChild.Child, lbLengths)
}
var rbLengths, lbLengths []float64
rVotes = countVotes(rbLengths, thresholdLen)
lVotes = countVotes(lbLengths, thresholdLen)
if rVotes > lVotes {
cRoot = rChild
losVotes = lVotes
losLengths = lbLengths
winVotes = rVotes
rChild = cRoot.Child
lChild = rChild.Sib
} else {
cRoot = lChild
losVotes = rVotes
losLengths = rbLengths
winVotes = lVotes
rChild = cRoot.Child
lChild = rChild.Sib
}
func isInFloat(slice []float64, value float64) bool {
for _, x := range slice {
if x == value {
return true
}
}
return false
}
fmt.Printf(
"round: %d\twinner: %s\tvotes: %d\n",
round, cRoot.Label, winVotes)
if *optVerbose {
//<<Print the voting results (branches)>>
}
fmt.Printf("(%s;\n", cRoot.String()[1:])
fmt.Printf("final:\t\tthe threshold branch length reached\n" +
"final:\t\treturn to the parent node (%s)\n",
cRoot.Label)
fmt.Printf("\nThe target clade:\n\n")