-
Notifications
You must be signed in to change notification settings - Fork 0
/
plotcount.py
executable file
·112 lines (95 loc) · 3.53 KB
/
plotcount.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/env python
import numpy as np
import matplotlib
matplotlib.use("AGG")
import matplotlib.pyplot as plt
import sys
from collections import Sequence
from wordcount import load_word_counts
def plot_word_counts(counts, limit=10):
"""
Given a list of (word, count, percentage) tuples, plot the counts as a
histogram. Only the first limit tuples are plotted.
"""
# Calculate plot values
limited_counts = counts[0:limit]
word_data = [word for (word, _, _) in limited_counts]
count_data = [count for (_, count, _) in limited_counts]
position = np.arange(len(word_data))
width = 1.0
# Create the plot
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_title("Word Counts")
ax.set_xticks(position + (width / 2))
ax.set_xticklabels(word_data)
ax.bar(position, count_data, width, color="b")
def typeset_labels(labels=None, gap=5):
"""
Given a list of labels, create a new list of labels such that each label
is right-padded by spaces so that every label has the same width, then
is further right padded by ' ' * gap.
"""
if not isinstance(labels, Sequence):
labels = list(range(labels))
labels = [str(i) for i in labels]
label_lens = [len(s) for s in labels]
label_width = max(label_lens)
output = []
for label in labels:
label_string = label + " " * (label_width - len(label)) + (" " * gap)
output.append(label_string)
assert len(set(len(s) for s in output)) == 1 # Check all have same length.
return output
def get_ascii_bars(values, truncate=True, maxlen=10, symbol="#"):
"""
Given a list of values, create a list of strings of symbols, where each
strings contains N symbols where N = ()(value / minimum) /
(maximum - minimum)) * (maxlen / len(symbol)).
"""
maximum = max(values)
if truncate:
minimum = min(values) - 1
else:
minimum = 0
# Type conversion to floats is required for compatibility with python 2,
# because it doesn't do integer division correctly (it does floor divison
# for integers).
value_range = float(maximum - minimum)
prop_values = [(float(value - minimum) / value_range) for value in values]
# Type conversion to int required for compatibility with python 2
biggest_bar = symbol * int(round(maxlen / len(symbol)))
bars = [biggest_bar[: int(round(prop * len(biggest_bar)))] for prop in prop_values]
return bars
def plot_ascii_bars(values, labels=None, screenwidth=80, gap=2, truncate=True):
"""
Given a list of values and labels, create right-padded labels for each
label and strings of symbols representing the associated values.
"""
if not labels:
try:
values, labels = list(zip(*values))
except TypeError:
labels = len(values)
labels = typeset_labels(labels=labels, gap=gap)
bars = get_ascii_bars(
values, maxlen=screenwidth - gap - len(labels[0]), truncate=truncate
)
return [s + b for s, b in zip(labels, bars)]
if __name__ == "__main__":
input_file = sys.argv[1]
output_file = sys.argv[2]
limit = 10
print("I am running")
if len(sys.argv) > 3:
limit = int(sys.argv[3])
counts = load_word_counts(input_file)
plot_word_counts(counts, limit)
if output_file == "show":
plt.show()
elif output_file == "ascii":
words, counts, _ = list(zip(*counts))
for line in plot_ascii_bars(counts[:limit], words[:limit], truncate=False):
print(line)
else:
plt.savefig(output_file)