-
Notifications
You must be signed in to change notification settings - Fork 1
/
BA7E
133 lines (113 loc) · 5.43 KB
/
BA7E
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import numpy as np
# Define a class for the Neighbor-Joining algorithm
class NeighborJoining:
# Constructor that takes the number of taxa (n) and the distance matrix (distMatrix)
def _init_(self, n, distMatrix):
# Call the runNeighborJoining method to construct the phylogenetic tree
adjacencies = self.runNeighborJoining(distMatrix, n)
# Print the resulting tree
self.printTree(adjacencies)
# Method to read input from the command line
def readFromCommandLine(self):
# Prompt the user for the number of taxa (n)
n = int(input("Enter the number of taxa: "))
# Initialize an empty list to store the distance matrix
distMatrix = []
# Loop to read the distance matrix row by row
for _ in range(n):
# Prompt the user for a row of distances as space-separated values
row = list(map(int, input().split()))
# Append the row to the distance matrix
distMatrix.append(row)
# Return the number of taxa and the distance matrix
return n, distMatrix
# Method to save the result to a file
def saveResultToFile(self, adjacencies):
# Open a file named 'result.txt' for writing
with open('result.txt', 'w') as file:
# Loop through the adjacency list and write each edge to the file
for i, nodes in enumerate(adjacencies):
for neighbor, weight in nodes:
# Write the edge information to the file
file.write(f"{i}->{neighbor}:{weight:.3f}\n")
# Method to print the distance matrix to the console
def printDistanceMatrix(self, distMatrix):
# Loop through the rows of the distance matrix and print each row
for row in distMatrix:
# Join the elements of the row into a space-separated string and print it
print(' '.join(map(str, row)))
# Method to print the phylogenetic tree to the console
def printTree(self, adjacencies):
# Loop through the nodes in the adjacency list and print each edge
for i, nodes in enumerate(adjacencies):
for neighbor, weight in nodes:
# Print the edge information
print(f"{i}->{neighbor}:{weight:.3f}")
# Method to run the Neighbor-Joining algorithm
def runNeighborJoining(self, distMatrix, n):
# Convert the distance matrix to a NumPy array for efficient computations
D = np.array(distMatrix, dtype=float)
# Initialize a list to keep track of clusters
clusters = [i for i in range(n)]
# Initialize an adjacency list to store the edges of the phylogenetic tree
adjacencies = [[] for i in range(n)]
# Check if there are fewer than or equal to 1 taxa
if len(D) <= 1:
return adjacencies
# Main loop of the Neighbor-Joining algorithm
while True:
# Check if there are only 2 taxa remaining to join
if n == 2:
# Connect the last two taxa with their distance as the branch length
adjacencies[len(adjacencies) - 1].append((len(adjacencies) - 2, D[0][1]))
adjacencies[len(adjacencies) - 2].append((len(adjacencies) - 1, D[0][1]))
break
# Calculate the total genetic distance for each taxon
totalDistances = np.sum(D, axis=0)
# Initialize a modified distance matrix
D1 = (n - 2) * D
D1 = D1 - totalDistances
D1 = D1 - totalDistances.reshape((n, 1))
np.fill_diagonal(D1, 0.)
# Find the minimum entry in the modified distance matrix
minIndex = np.argmin(D1)
i = minIndex // n
j = minIndex % n
# Calculate delta and branch lengths for the new cluster
delta = (totalDistances[i] - totalDistances[j]) / (n - 2)
li = (D[i, j] + delta) / 2
lj = (D[i, j] - delta) / 2
# Calculate the distances for the new cluster
newDistances = (D[i, :] + D[j, :] - D[i, j]) / 2
# Expand and update the distance matrix
D = np.insert(D, n, newDistances, axis=0)
newDistances = np.insert(newDistances, n, 0., axis=0)
D = np.insert(D, n, newDistances, axis=1)
D = np.delete(D, [i, j], 0)
D = np.delete(D, [i, j], 1)
# Add the new cluster to the adjacency list
m = len(adjacencies)
adjacencies.append([])
adjacencies[m].append((clusters[i], li))
adjacencies[clusters[i]].append((m, li))
adjacencies[m].append((clusters[j], lj))
adjacencies[clusters[j]].append((m, lj))
# Update the cluster list by removing the joined clusters
if i < j:
del clusters[j]
del clusters[i]
else:
del clusters[i]
del clusters[j]
# Add the new cluster to the cluster list
clusters.append(m)
# Decrement the number of taxa
n -= 1
# Return the adjacency list representing the phylogenetic tree
return adjacencies
# Entry point of the program
if _name_ == "_main_":
# Create an instance of the NeighborJoining class and read input from the command line
n, distMatrix = NeighborJoining(0, []).readFromCommandLine()
# Construct the phylogenetic tree and print it
NeighborJoining(n, distMatrix)