-
Notifications
You must be signed in to change notification settings - Fork 1
/
test_c_u_curve.m
157 lines (133 loc) · 9.38 KB
/
test_c_u_curve.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
% Uwe Ehret, 2022/11/01
% This provides test application for the c-u-curve method
% Required Matlab products: Matlab 9.9
% For further explanations, please see the related publication below. Please cite this publication when applying the method.
% Ehret, U., and Dey, P.: Technical note: c-u-curve: A method to analyse, classify and compare dynamical systems by uncertainty and complexity, Hydrol. Earth Syst. Sci. Discuss., 2022, 1-12, 10.5194/hess-2022-16, 2022.
clearvars
clear all
close all
clc
%% Test data set: 1-d, deterministic
% settings
nt = 4096; % number of rows (=time steps) in the data set
ndim = 1; % number of colums (=variables) in the data set
nens = 1; % number of ensemble members in the data set
nvb = 10; % [1,ndim] array with number of equal-size bins for the value range of the data values of each variable
neb = 12; % number of equal-size bins for the value range of entropies
vals_min = 0; % minimum value (used to re-scale data values)
vals_max = 1; % maximum value (used to re-scale data values)
% array with all time slice widths to be examined
% - size is [nss,1], with nss being total number of time slicing schemes to be examined
% - order is ascending, minimum possible value is 1, maximum possible value is nt
slice_widths = [1 2 4 8 16 32 64 128 256 512 1024 2048 nt]';
% create test data set (random normal)
dummy = 2*randn(nt,1); % create data set
vals = rescale(dummy,vals_min,vals_max); % normalize to [0,1] range for convenient binning
% create edges of value bins
% - [1,ndim] cell array, with a [1,nvb+1] array of bin edges for each variable
edges_vals = cell(1,ndim);
edges_vals{1} = linspace(vals_min,vals_max,nvb+1);
% create edges of entropy bins
% - [1,1] cell array, with a [1,neb+1] array of bin edges for the entropy values
% - the possible value range for entropy values is always [0,log2(total number of value bins for the entire ndim-dimensional space of values)]
tot_nvb = nvb; % total number of value bins for the entire ndim-dimensional space of values
edges_entropy = cell(1,1);
edges_entropy{1} = linspace(0,log2(tot_nvb),neb+1);
% calculate uncertainty and complexity
[uncs,comps,ns,all_uncs] = f_c_u_curve(vals, edges_vals, edges_entropy, slice_widths,0);
% calculate maximum and minimum possible uncertainty and complexity
% - this is useful to put actual uncertainties and complexities into perspective
% - maximum possible uncertainty is reached if the entire ndim-dimensional space of values
% is filled by a uniform distribution, whose entropy is log2(total number of value bins for the entire ndim-dimensional space of values)
% - maximum possible complexity is reached if the entropies of all time slices of a time slicing scheme are uniformly distributed.
% In that case, entropy is log2(number of entropy bins).
% - minimum possible value both for uncertainty and complexity is zero
tot_nvb = prod(cellfun(@length,edges_vals)-1); % total number of value bins for the entire ndim-dimensional space of values
max_possible_unc = log2(tot_nvb); % maximum possible uncertainty
max_possible_comp = log2(neb); % maximum posible complexity
min_possible_unc = 0; % minimum possible uncertainty
min_possible_comp = 0; % minimum posible complexity
%% Test data set: 2-d, deterministic
% settings
nt = 4096; % number of rows (=time steps) in the data set
ndim = 2; % number of colums (=variables) in the data set
nens = 1; % number of ensemble members in the data set
nvb = [10 10]; % [1,ndim] array with number of equal-size bins for the value range of the data values of each variable
neb = 12; % number of equal-size bins for the value range of entropies
vals_min = 0; % minimum value (used to re-scale data values)
vals_max = 1; % maximum value (used to re-scale data values
% array with all time-slice widhts to be examined
% - size is [nss,1], with nss being total number of time slicing schemes to be examined
% - order is ascending, minimum possible value is 1, maximum possible value is nt
slice_widths = [1 2 4 8 16 32 64 128 256 512 1024 2048 nt]';
% create test data set (random normal)
dummy = 2*randn(nt,ndim); % create data set
vals = rescale(dummy,vals_min,vals_max); % normalize to [0,1] range for convenient binning
% create edges of value bins
% - [1,ndim] cell array, with a [1,nvb+1] array of bin edges for each variable
edges_vals = cell(1,ndim);
for i = 1 : ndim
edges_vals{i} = linspace(vals_min,vals_max,nvb(i)+1);
end
% create edges of entropy bins
% - [1,1] cell array, with a [1,neb+1] array of bin edges for the entropy values
% - the possible value range for entropy values is always [0,log2(total number of value bins for the entire ndim-dimensional space of values)]
tot_nvb = prod(cellfun(@length,edges_vals)-1); % total number of value bins for the entire ndim-dimensional space of values
edges_entropy = cell(1,1);
edges_entropy{1} = linspace(0,log2(tot_nvb),neb+1);
% calculate uncertainty and complexity
[uncs,comps,ns,all_uncs] = f_c_u_curve(vals, edges_vals, edges_entropy, slice_widths,0);
% calculate maximum and minimum possible uncertainty and complexity
% - this is useful to put actual uncertainties and complexities into perspective
% - maximum possible uncertainty is reached if the entire ndim-dimensional space of values
% is filled by a uniform distribution, whose entropy is log2(total number of value bins for the entire ndim-dimensional space of values)
% - maximum possible complexity is reached if the entropies of all time slices of a time slicing scheme are uniformly distributed.
% In that case, entropy is log2(number of entropy bins).
% - minimum possible value both for uncertainty and complexity is zero
tot_nvb = prod(cellfun(@length,edges_vals)-1); % total number of value bins for the entire ndim-dimensional space of values
max_possible_unc = log2(tot_nvb); % maximum possible uncertainty
max_possible_comp = log2(neb); % maximum posible complexity
min_possible_unc = 0; % minimum possible uncertainty
min_possible_comp = 0; % minimum posible complexity
%% Test data set: 2-d, 3-member ensemble
% settings
nt = 4096; % number of rows (=time steps) in the data set
ndim = 2; % number of colums (=variables) in the data set
nens = 3; % number of ensemble members in the data set
nvb = [10 10]; % [1,ndim] array with number of equal-size bins for the value range of the data values of each variable
neb = 12; % number of equal-size bins for the value range of entropies
vals_min = 0; % minimum value (used to re-scale data values)
vals_max = 1; % maximum value (used to re-scale data values
% array with all time-slice widhts to be examined
% - size is [nss,1], with nss being total number of time-slicing schemes to be examined
% - order is ascending, minimum possible value is 1, maximum possible value is nt
slice_widths = [1 2 4 8 16 32 64 128 256 512 1024 2048 nt]';
% create test data set (random normal)
dummy = 2*randn(nt,ndim,nens); % create data set
vals = rescale(dummy,vals_min,vals_max); % normalize to [0,1] range for convenient binning
% create edges of value bins
% - [1,ndim] cell array, with a [1,nvb+1] array of bin edges for each variable
edges_vals = cell(1,ndim);
for i = 1 : ndim
edges_vals{i} = linspace(vals_min,vals_max,nvb(i)+1);
end
% create edges of entropy bins
% - [1,1] cell array, with a [1,neb+1] array of bin edges for the entropy values
% - the possible value range for entropy values is always [0,log2(total number of value bins for the entire ndim-dimensional space of values)]
tot_nvb = prod(cellfun(@length,edges_vals)-1); % total number of value bins for the entire ndim-dimensional space of values
edges_entropy = cell(1,1);
edges_entropy{1} = linspace(0,log2(tot_nvb),neb+1);
% calculate uncertainty and complexity
[uncs,comps,ns,all_uncs] =f_c_u_curve(vals, edges_vals, edges_entropy, slice_widths,0);
% calculate maximum and minimum possible uncertainty and complexity
% - this is useful to put actual uncertainties and complexities into perspective
% - maximum possible uncertainty is reached if the entire ndim-dimensional space of values
% is filled by a uniform distribution, whose entropy is log2(total number of value bins for the entire ndim-dimensional space of values)
% - maximum possible complexity is reached if the entropies of all time slices of a time-slicing scheme are uniformly distributed.
% In that case, entropy is log2(number of entropy bins).
% - minimum possible value both for uncertainty and complexity is zero
tot_nvb = prod(cellfun(@length,edges_vals)-1); % total number of value bins for the entire ndim-dimensional space of values
max_possible_unc = log2(tot_nvb); % maximum possible uncertainty
max_possible_comp = log2(neb); % maximum posible complexity
min_possible_unc = 0; % minimum possible uncertainty
min_possible_comp = 0; % minimum posible complexity