Skip to content

Latest commit

 

History

History
965 lines (835 loc) · 28.4 KB

fita.org

File metadata and controls

965 lines (835 loc) · 28.4 KB
package main

import (
	  //<<Imports>>
)
//<<Auxillary variables>>
//<<Functions>>
func main() {
	  //<<Main function>>
}
//<<Set usage>>
//<<Declare and parse options>>
//<<Read the input>>
//<<Find the LCA of the majority of targets>>
if *optC {
	  //<<Remove the long branches>>
}
u := "fita [options] [tree file] [list of target genome names]"
p := "Find the target clade subtree."
e := "fita -verbose -c tree.nwk targets.txt"
clio.Usage(u, p, e)
"github.com/evolbioinf/clio"
var optC = flag.Bool("c", false, "complete mode" +
	  "\n(remove most of the neighbors and filter out" +
	  "\nthe clades containing branches longer than" +
	  "\nthe automatically calculated threshold)")
var optVerbose = flag.Bool("verbose", false, "verbose mode" +
	  "\n(the output may be incompatible with piping)")
flag.Parse()
"flag"
  // Read the tree
r := newOsReader(0)
scNwk := nwk.NewScanner(r)
scNwk.Scan()
  // Read the target list
r = newOsReader(1)
var targetList []string
scStr := bufio.NewScanner(r)
for scStr.Scan() {
	  targetList = append(targetList, scStr.Text())
}
"github.com/evolbioinf/nwk"
"bufio"
func newOsReader(x int) io.Reader {
	  var err error
	  var r io.Reader
	  r, err = os.Open(flag.Args()[x])
	  if err != nil {
		  panic(err)
	  }
	  return r
}
"io"
"os"
var losLeaves int = 1
var losVotes int = 0
var winVotes int = 0
var winLeaves []string
var round int = 1
//<<Voting initialization (leaves)>>
//<<The voting (leaves)>>
//<<Print the target clade (leaves)>>
var cRoot *nwk.Node = scNwk.Tree()
var rChild *nwk.Node = cRoot.Child
var lChild *nwk.Node = rChild.Sib
var rLeafLabels, lLeafLabels []string
if *optVerbose {
	  fmt.Printf("# Step 1: find the target clade... #\n\n")
}

for losVotes != losLeaves {
	  //<<Reset the previous results (leaves)>>
	  //<<Get leaf labels>>
	  //<<Count votes (leaves)>>
	  //<<Choose winner (leaves)>>
	  if *optVerbose {
		  //<<Print the results of the round (leaves)>>
	  }
	  round++
	  cRoot = cRoot.Parent
}
rVotes, lVotes = 0, 0
rLeafLabels, lLeafLabels = nil, nil
  // the right child
if rChild.Child == nil {
	  rLeafLabels = append(rLeafLabels, rChild.Label)
} else {
	  rLeafLabels = getLeafLabels(rChild.Child, rLeafLabels)
}
  // the left child
if lChild.Child == nil {
	  lLeafLabels = append(lLeafLabels, lChild.Label)
} else {
	  lLeafLabels = getLeafLabels(lChild.Child, lLeafLabels)
}
func getLeafLabels(n *nwk.Node, l []string) []string {
	  if n == nil {return l}
	  if n.Child == nil {
		  l = append(l, n.Label)
	  }
	  l = getLeafLabels(n.Child, l)
	  l = getLeafLabels(n.Sib, l)
	  return l
}
rVotes = countVotes(rLeafLabels, targetList)
lVotes = countVotes(lLeafLabels, targetList)
func countVotes(arg1 interface{}, arg2 interface{}) int {
	  votes := 0
	  switch items := arg1.(type) {
	  case []string:
		  //<<Target leaves vote>>
	  case []float64:
		  //<<Short branches vote>>
	  default: 
		  panic("countVotes() failed: unexpected input")
	  }
	  return votes
}
crit := arg2.([]string)
for _, leaf := range items {
	  if isInString(crit, leaf){
		  votes++
	  }
}
func isInString(slice []string, value string) bool {
	  for _, x := range slice {
		  if x == value {
			  return true
		  }
	  }
	  return false
}
crit := arg2.(float64)
for _, length := range items {
	  if length < crit {
		  votes++
	  }	
}
var rVotes, lVotes int = 0, 0
if rVotes > lVotes {
	  cRoot = rChild
	  winVotes = rVotes
	  winLeaves = rLeafLabels
	  rChild = cRoot.Child
	  lChild = rChild.Sib
	  losVotes = lVotes
	  losLeaves = len(lLeafLabels)
} else {
	  cRoot = lChild
	  winVotes = lVotes
	  winLeaves = lLeafLabels
	  rChild = cRoot.Child
	  lChild = rChild.Sib
	  losVotes = rVotes
	  losLeaves = len(rLeafLabels)
}
fmt.Printf(
	  "round: %d\twinner: %s\tvotes: %d\tleaves: %d\n",
	  round, cRoot.Label, winVotes, len(winLeaves))
if *optVerbose {
	  //<<Print the voting results (leaves)>>
}
if !*optC {
	  fmt.Printf("(%s;\n", cRoot.String()[1:])
}
"fmt"
fmt.Printf(
	  "final:\t\tone of the clades consists of the targets\n")
fmt.Printf(
	  "final:\t\treturn to the parent node (%s)\n" +
	  "\nThe found target clade has %d of %d targets.\n",
	  cRoot.Label, winVotes, len(targetList))
fmt.Printf("\nThe target clade:\n\n%s;\n", cRoot.String()[1:])	
//<<Voting initialization (branches)>>
//<<The voting (branches)>>
//<<Print the target clade (branches)>>
var allLengths []float64
allLengths = getBranchLengths(cRoot, allLengths)
round = 1
//<<Calculate the branch length threshold>>
func getBranchLengths(n *nwk.Node, l []float64) []float64 {
	  if n == nil {return l}
	  if n.HasLength {
		  l = append(l, n.Length)
	  }
	  l = getBranchLengths(n.Child, l)
	  l = getBranchLengths(n.Sib, l)
	  return l
}
//<<Branch length clustering>>
//<<Retrieve the threshold>>
//<<Prepare the data>>
//<<DBSCAN clustering>>
sort.Float64s(allLengths)
nVal := len(allLengths)
data := make([][]float64, nVal)
  // create an empty table
for i := range data {
	  data[i] = make([]float64, 2)
}
  // fill the table with values
for i, v := range allLengths {
	  data[i][0] = 0
	  data[i][1] = v
}
"sort"
//<<Define DBSCAN parameters>>
//<<Create a clusterer>>
//<<Find clusters>>
var minpts int = 1
var eps float64 = (
	  data[nVal-1][1] - data[0][1]) / float64(nVal)
if *optVerbose {
	  fmt.Printf("\n# Step 2: removing long branches... #\n" +
		     "\neps = %2f\n\n", eps)
}
c, err := clusters.DBSCAN(
	  minpts, eps, 1, clusters.EuclideanDistance)
if err != nil {
	  panic(err)
}
"github.com/mpraski/clusters"
if err = c.Learn(data); err != nil {
	  panic(err)
}

if *optVerbose {
	  fmt.Printf(
		  "The branches were set into %d clusters:\n\n",
		  len(c.Sizes()))
	    // print the table of cluster sizes
	  fmt.Printf("Cluster id\tCluster size\n")
	  for i, n := range c.Sizes() {
		  fmt.Printf("%d\t\t%d\n",
			  i+1, n)
	  }
	    // print the table of cluster mappings
	  fmt.Printf("\nBranch length\tCluster id\n")
	  for i, n := range c.Guesses() {
		  if n == -1 {
			  fmt.Printf(
				  "%2f\toutlier\n",
				  data[i][1])
		  } else {
			  fmt.Printf(
				  "%2f\t%-3d\n",
				  data[i][1], n)
		  }
	  }
}
percentile := int(math.Floor(float64(c.Sizes()[0])*0.95))
thresholdLen := data[percentile][1]

if *optVerbose {
	  fmt.Printf(
		  "\nThreshold branch length = %2f\n", thresholdLen)
}
"math"
rChild = cRoot.Child
lChild = rChild.Sib
for !thresholdFound {
	  //<<Reset the previous results (branches)>>
	  //<<Get branch lengths>>
	  //<<Count votes (branches)>>
	  //<<Choose winner (branches)>>
	  if *optVerbose {
		  //<<Print the results of the round (branches)>>
	  }
	  round++
	  tr = isInFloat(losLengths, thresholdLen)
	  tl = isInFloat(losLengths, thresholdLen)
	  thresholdFound = tr || tl
}
cRoot = cRoot.Parent
var thresholdFound, tr, tl bool
rVotes, lVotes = 0, 0
rbLengths, lbLengths = nil, nil
  // the right child
if rChild.Child == nil {
	  rbLengths = append(rbLengths, rChild.Length)
  } else {
	  rbLengths = getBranchLengths(rChild.Child, rbLengths)
  }
  // the left child
if lChild.Child == nil {
	  lbLengths = append(lbLengths, lChild.Length)
  } else {
	  lbLengths = getBranchLengths(lChild.Child, lbLengths)
  }
var rbLengths, lbLengths []float64
rVotes = countVotes(rbLengths, thresholdLen)
lVotes = countVotes(lbLengths, thresholdLen)
if rVotes > lVotes {
	  cRoot = rChild
	  losVotes = lVotes
	  losLengths = lbLengths
	  winVotes = rVotes
	  rChild = cRoot.Child
	  lChild = rChild.Sib
} else {
	  cRoot = lChild
	  losVotes = rVotes
	  losLengths = rbLengths
	  winVotes = lVotes
	  rChild = cRoot.Child
	  lChild = rChild.Sib
}
var losLengths []float64
func isInFloat(slice []float64, value float64) bool {
	  for _, x := range slice {
		  if x == value {
			  return true
		  }
	  }
	  return false
}
fmt.Printf(
	  "round: %d\twinner: %s\tvotes: %d\n",
	  round, cRoot.Label, winVotes)
if *optVerbose {
	  //<<Print the voting results (branches)>>
}
fmt.Printf("(%s;\n", cRoot.String()[1:])
fmt.Printf("final:\t\tthe threshold branch length reached\n" +
	  "final:\t\treturn to the parent node (%s)\n",
	  cRoot.Label)

fmt.Printf("\nThe target clade:\n\n")