Skip to content

Commit

Permalink
Update plot_substack_cc to use CC store (#153)
Browse files Browse the repository at this point in the history
* Update plot_substack_cc to use CC store

* Support plotting stacks using StackStore
  • Loading branch information
carlosgjs authored Jul 20, 2023
1 parent bc7e50e commit bb4d2ca
Show file tree
Hide file tree
Showing 9 changed files with 170 additions and 102 deletions.
31 changes: 29 additions & 2 deletions src/noisepy/seis/asdfstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __getitem__(self, key: T) -> pyasdf.ASDFDataSet:
return _get_dataset(file_path, self.mode)

def get_keys(self) -> List[T]:
h5files = sorted(glob.glob(os.path.join(self.directory, "*.h5")))
h5files = sorted(glob.glob(os.path.join(self.directory, "**/*.h5"), recursive=True))
return list(map(self.parse_filename, h5files))

def mark_done(self, key: T):
Expand Down Expand Up @@ -190,7 +190,7 @@ def read(
class ASDFStackStore(StackStore):
def __init__(self, directory: str, mode: str = "a"):
super().__init__()
self.datasets = ASDFDirectory(directory, mode, _filename_from_stations, parse_station_pair)
self.datasets = ASDFDirectory(directory, mode, _filename_from_stations, _parse_station_pair_h5file)

def mark_done(self, src: Station, rec: Station):
self.datasets.mark_done((src, rec))
Expand All @@ -203,6 +203,28 @@ def append(
):
self.datasets.add_aux_data((src, rec), stack_params, name, components, data)

def get_station_pairs(self) -> List[Tuple[Station, Station]]:
return self.datasets.get_keys()

def get_stack_names(self, src: Station, rec: Station) -> List[str]:
with self.datasets[(src, rec)] as ds:
return [name for name in ds.auxiliary_data.list() if name != PROGRESS_DATATYPE]

def get_components(self, src: Station, rec: Station, name: str) -> List[str]:
with self.datasets[(src, rec)] as ds:
if name not in ds.auxiliary_data:
logger.warning(f"Not data available for {src}_{rec}/{name}")
return []
return ds.auxiliary_data[name].list()

def read(self, src: Station, rec: Station, component: str, name: str) -> Tuple[Dict[str, Any], np.ndarray]:
with self.datasets[(src, rec)] as ds:
if name not in ds.auxiliary_data or component not in ds.auxiliary_data[name]:
logger.warning(f"Not data available for {src}_{rec}/{name}/{component}")
return ({}, np.empty(0))
stream = ds.auxiliary_data[name][component]
return (stream.parameters, stream.data[:])


def _get_dataset(filename: str, mode: str) -> pyasdf.ASDFDataSet:
logger.debug(f"ASDFCCStore - Opening {filename}")
Expand All @@ -221,3 +243,8 @@ def _filename_from_stations(pair: Tuple[Station, Station]) -> str:

def _filename_from_timespan(timespan: DateTimeRange) -> str:
return f"{timespan_str(timespan)}.h5"


def _parse_station_pair_h5file(path: str) -> Tuple[Station, Station]:
pair = Path(path).stem
return parse_station_pair(pair)
128 changes: 63 additions & 65 deletions src/noisepy/seis/plotting_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
import obspy
import pyasdf
import scipy
from datetimerange import DateTimeRange
from obspy.signal.filter import bandpass
from scipy.fftpack import next_fast_len

from noisepy.seis.constants import PROGRESS_DATATYPE
from noisepy.seis.stores import CrossCorrelationDataStore, StackStore

logging.getLogger("matplotlib.font_manager").disabled = True
logger = logging.getLogger(__name__)

"""
Ensembles of plotting functions to display intermediate/final waveforms from the NoisePy package.
Expand Down Expand Up @@ -119,42 +121,51 @@ def plot_waveform(sfile, net, sta, freqmin, freqmax, savefig=False, sdir=None):
#############################################################################


def plot_substack_cc(sfile, freqmin, freqmax, disp_lag=None, savefig=True, sdir="./"):
def plot_substack_cc(
cc_store: CrossCorrelationDataStore,
ts: DateTimeRange,
freqmin,
freqmax,
disp_lag=None,
savefig=True,
sdir="./",
):
"""
display the 2D matrix of the cross-correlation functions for a certain time-chunck.
PARAMETERS:
--------------------------
sfile: cross-correlation functions outputed by S1
cc_store: Store to read CC data from
ts: Timespan to ploe
freqmin: min frequency to be filtered
freqmax: max frequency to be filtered
disp_lag: time ranges for display
USAGE:
--------------------------
plot_substack_cc('temp.h5',0.1,1,100,True,'./')
savefg: Whether to save the figures as a PDF on disk
sdir: Save directory
Note: IMPORTANT!!!! this script only works for cross-correlation with sub-stacks being set to True in S1.
"""
# open data for read
if savefig:
if sdir is None:
print("no path selected! save figures in the default path")
raise ValueError("sdir argument must be provided if savefig=True")

try:
ds = pyasdf.ASDFDataSet(sfile, mode="r")
# extract common variables
spairs = ds.auxiliary_data.list()
spairs = list(filter(lambda x: x != PROGRESS_DATATYPE, spairs))
path_lists = ds.auxiliary_data[spairs[0]].list()
flag = ds.auxiliary_data[spairs[0]][path_lists[0]].parameters["substack"]
dt = ds.auxiliary_data[spairs[0]][path_lists[0]].parameters["dt"]
maxlag = ds.auxiliary_data[spairs[0]][path_lists[0]].parameters["maxlag"]
except Exception:
raise Exception("exit! cannot open %s to read" % sfile)
sta_pairs = cc_store.get_station_pairs()
if len(sta_pairs) == 0:
logger.error("No data available for plotting")
return

ch_pairs = cc_store.get_channeltype_pairs(ts, sta_pairs[0][0], sta_pairs[0][1])
if len(ch_pairs) == 0:
logger.error(f"No data available for plotting in {ts}/{sta_pairs[0]}")
return

# Read some common arguments from the first available data set:
params, _ = cc_store.read(ts, sta_pairs[0][0], sta_pairs[0][1], ch_pairs[0][0], ch_pairs[0][1])
substack_flag, dt, maxlag = (params[p] for p in ["substack", "dt", "maxlag"])

# only works for cross-correlation with substacks generated
if not flag:
if not substack_flag:
raise ValueError("seems no substacks have been done! not suitable for this plotting function")

# lags for display
Expand All @@ -169,23 +180,20 @@ def plot_substack_cc(sfile, freqmin, freqmax, disp_lag=None, savefig=True, sdir=
indx1 = int((maxlag - disp_lag) / dt)
indx2 = indx1 + 2 * int(disp_lag / dt) + 1

for spair in spairs:
ttr = spair.split("_")
net1, sta1 = ttr[0].split(".")
net2, sta2 = ttr[1].split(".")
for ipath in path_lists:
chan1, chan2 = ipath.split("_")
# for spair in spairs:
for src_sta, rec_sta in sta_pairs:
ch_pairs = cc_store.get_channeltype_pairs(ts, src_sta, rec_sta)
for src_cha, rec_cha in ch_pairs:
try:
dist = ds.auxiliary_data[spair][ipath].parameters["dist"]
ngood = ds.auxiliary_data[spair][ipath].parameters["ngood"]
ttime = ds.auxiliary_data[spair][ipath].parameters["time"]
params, all_data = cc_store.read(ts, src_sta, rec_sta, src_cha, rec_cha)
dist, ngood, ttime = (params[p] for p in ["dist", "ngood", "time"])
timestamp = np.empty(ttime.size, dtype="datetime64[s]")
except Exception:
print("continue! something wrong with %s %s" % (spair, ipath))
logger.warning(f"continue! something wrong with {src_sta}_{rec_sta}/{src_cha}_{rec_cha}")
continue

# cc matrix
data = ds.auxiliary_data[spair][ipath].data[:, indx1:indx2]
data = all_data[:, indx1:indx2]
nwin = data.shape[0]
amax = np.zeros(nwin, dtype=np.float32)
if nwin == 0 or len(ngood) == 1:
Expand Down Expand Up @@ -216,7 +224,7 @@ def plot_substack_cc(sfile, freqmin, freqmax, disp_lag=None, savefig=True, sdir=
extent=[-disp_lag, disp_lag, nwin, 0],
aspect="auto",
)
ax1.set_title("%s.%s.%s %s.%s.%s dist:%5.2fkm" % (net1, sta1, chan1, net2, sta2, chan2, dist))
ax1.set_title(f"{src_sta}.{src_cha} {rec_sta}.{rec_cha} dist:{dist:5.2f}km")
ax1.set_xlabel("time [s]")
ax1.set_xticks(t)
ax1.set_yticks(np.arange(0, nwin, step=tick_inc))
Expand All @@ -234,13 +242,9 @@ def plot_substack_cc(sfile, freqmin, freqmax, disp_lag=None, savefig=True, sdir=

# save figure or just show
if savefig:
if sdir is None:
sdir = sfile.split(".")[0]
if not os.path.isdir(sdir):
os.mkdir(sdir)
outfname = sdir + "/{0:s}.{1:s}.{2:s}_{3:s}.{4:s}.{5:s}.pdf".format(
net1, sta1, chan1, net2, sta2, chan2
)
outfname = os.path.join(sdir, f"{src_sta}.{src_cha}_{rec_sta}.{rec_cha}.pdf")
fig.savefig(outfname, format="pdf", dpi=400)
plt.close()
else:
Expand Down Expand Up @@ -625,8 +629,8 @@ def plot_substack_all_spect(


def plot_all_moveout(
sfiles,
dtype,
store: StackStore,
stack_name,
freqmin,
freqmax,
ccomp,
Expand All @@ -640,8 +644,8 @@ def plot_all_moveout(
PARAMETERS:
---------------------
sfile: cross-correlation functions outputed by S2
dtype: datatype either 'Allstack0pws' or 'Allstack0linear'
store: StackStore to read stacked data
stack_name: datatype either 'Allstack0pws' or 'Allstack0linear'
freqmin: min frequency to be filtered
freqmax: max frequency to be filtered
ccomp: cross component
Expand All @@ -657,18 +661,20 @@ def plot_all_moveout(
# open data for read
if savefig:
if sdir is None:
print("no path selected! save figures in the default path")
raise ValueError("sdir argument must be provided if savefig=True")

path = ccomp
sta_pairs = store.get_station_pairs()
if len(sta_pairs) == 0:
logger.error("No data available for plotting")
return

# extract common variables
try:
ds = pyasdf.ASDFDataSet(sfiles[0], mode="r")
dt = ds.auxiliary_data[dtype][path].parameters["dt"]
maxlag = ds.auxiliary_data[dtype][path].parameters["maxlag"]
stack_method = dtype.split("0")[-1]
except Exception:
raise Exception("exit! cannot open %s to read" % sfiles[0])
# Read some common arguments from the first available data set:
params, dtmp = store.read(sta_pairs[0][0], sta_pairs[0][1], ccomp, stack_name)
if len(params) == 0 or dtmp.size == 0:
logger.error(f"No data available for plotting {stack_name}/{ccomp}")

dt, maxlag = (params[p] for p in ["dt", "maxlag"])
stack_method = stack_name.split("0")[-1]

# lags for display
if not disp_lag:
Expand All @@ -680,26 +686,18 @@ def plot_all_moveout(
indx2 = indx1 + 2 * int(disp_lag / dt) + 1

# cc matrix
nwin = len(sfiles)
nwin = len(sta_pairs)
data = np.zeros(shape=(nwin, indx2 - indx1), dtype=np.float32)
dist = np.zeros(nwin, dtype=np.float32)
ngood = np.zeros(nwin, dtype=np.int16)

# load cc and parameter matrix
for ii in range(len(sfiles)):
sfile = sfiles[ii]

ds = pyasdf.ASDFDataSet(sfile, mode="r")
try:
# load data to variables
dist[ii] = ds.auxiliary_data[dtype][path].parameters["dist"]
ngood[ii] = ds.auxiliary_data[dtype][path].parameters["ngood"]
tdata = ds.auxiliary_data[dtype][path].data[indx1:indx2]
except Exception as e:
print(f"continue! cannot read {sfile}: {e}")
continue
for ii, (src, rec) in enumerate(sta_pairs):
params, all_data = store.read(src, rec, ccomp, stack_name)
dist[ii] = params["dist"]
ngood[ii] = params["ngood"]

data[ii] = bandpass(tdata, freqmin, freqmax, int(1 / dt), corners=4, zerophase=True)
data[ii] = bandpass(all_data[indx1:indx2], freqmin, freqmax, int(1 / dt), corners=4, zerophase=True)

# average cc
ntrace = int(np.round(np.max(dist) + 0.51) / dist_inc)
Expand Down
20 changes: 18 additions & 2 deletions src/noisepy/seis/stores.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def _get_channel_pair(self, src_chan: ChannelType, rec_chan: ChannelType) -> str

class StackStore:
"""
A class for writing stack data
A class for reading and writing stack data
"""

@abstractmethod
Expand All @@ -114,13 +114,29 @@ def append(
):
pass

@abstractmethod
def get_station_pairs(self) -> List[Tuple[Station, Station]]:
pass

@abstractmethod
def get_stack_names(self, src: Station, rec: Station) -> List[str]:
pass

@abstractmethod
def get_components(self, src: Station, rec: Station, name: str) -> List[str]:
pass

@abstractmethod
def read(self, src: Station, rec: Station, component: str, name: str) -> Tuple[Dict[str, Any], np.ndarray]:
pass


def timespan_str(timespan: DateTimeRange) -> str:
return f"{timespan.start_datetime.strftime(DATE_FORMAT)}T{timespan.end_datetime.strftime(DATE_FORMAT)}"


def parse_station_pair(pair: str) -> Tuple[Station, Station]:
# Parse from:'CI.ARV_CI.BAK
# Parse from: CI.ARV_CI.BAK
def station(sta: str) -> Station:
net, name = sta.split(".")
return Station(net, name)
Expand Down
10 changes: 1 addition & 9 deletions tests/test_asdfstore.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from datetime import datetime
from pathlib import Path

import pytest

from noisepy.seis.asdfstore import ASDFCCStore, ASDFRawDataStore
from noisepy.seis.asdfstore import ASDFRawDataStore


@pytest.fixture
Expand All @@ -13,13 +12,6 @@ def store():
return ASDFRawDataStore(os.path.join(os.path.dirname(__file__), "./data"))


# Use the built in tmp_path fixture: https://docs.pytest.org/en/7.1.x/how-to/tmp_path.html
# to create a CC store
@pytest.fixture
def ccstore(tmp_path: Path) -> ASDFCCStore:
return ASDFCCStore(str(tmp_path))


def test_get_timespans(store: ASDFRawDataStore):
ts = store.get_timespans()
assert len(ts) == 1
Expand Down
2 changes: 2 additions & 0 deletions tests/test_ccstores.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from noisepy.seis.zarrstore import ZarrCCStore


# Use the built in tmp_path fixture: https://docs.pytest.org/en/7.1.x/how-to/tmp_path.html
# to create CC stores
@pytest.fixture
def asdfstore(tmp_path: Path) -> ASDFCCStore:
return ASDFCCStore(str(tmp_path))
Expand Down
Loading

0 comments on commit bb4d2ca

Please sign in to comment.