Dealing with long occurrences (i.e., skip them if they are longer tha…

…n 90 days and write exception.token)
julemai · Oct 1, 2022 · f125681 · f125681
1 parent e63de68
commit f125681
Show file tree

Hide file tree

Showing 2 changed files with 63 additions and 3 deletions.
diff --git a/doc/figures/plot_analyse_occurrence_all.py b/doc/figures/plot_analyse_occurrence_all.py
@@ -48,6 +48,7 @@
 
 results = {}
 nfiles = 0
+too_long = []
 for zipfile in zipfiles:
 
     print("Read data from {}".format(zipfile))
@@ -67,8 +68,23 @@
     # print("jsonfile = ",jsonfile)
 
     if len(jsonfile) != 1:
-        if len(jsonfile) == 0:
-            print("Event {} probably still processing. Skip for now.".format(str(unzippedfoldername)))
+        exceptionfile = glob.glob(str(unzippedfoldername)+'/exception.token')
+        if len(exceptionfile) == 1:
+            print("Occurrence {} not analysed because too long.".format(str(unzippedfoldername)))
+            ff = open(str(exceptionfile[0]), "r")
+            content = ff.read()
+            ff.close()
+            # analyse_occurrence: Occurrence 2c42d6b6-ae77-47c3-9fb5-0e4dde8e6719 (idx=212) not processed because it is too long (1151.0 days)
+            uuid = content.split(' ')[2]
+            idx = int(content.split('=')[1].split(')')[0])
+            length_occurrence = float(content.split('(')[2].split(' ')[0])
+            too_long.append({'uuid':uuid,'idx':idx,'length':length_occurrence})
+            # remove unzipped files and folder if they were created here
+            if unpacked:
+                shutil.rmtree(unzippedfoldername)
+            continue
+        elif len(jsonfile) == 0:
+            print("Occurrence {} probably still processing. Skip for now.".format(str(unzippedfoldername)))
             # remove unzipped files and folder if they were created here
             if unpacked:
                 shutil.rmtree(unzippedfoldername)
@@ -182,13 +198,18 @@
     np.sum(available_timesteps_n)+np.sum(missing_timesteps_n),
     np.sum(missing_timesteps_n)/(np.sum(available_timesteps_n)+np.sum(missing_timesteps_n))*100.))
 print("\\item precipitation sum below 10~mm for {} of {} features".format(len(precip_small),len(features_idx)))
-print("\\item precipitation sum above 1000~mm for {} of {} features \\\\(all have multi-year period specified in HFE database)\\\\".format(len(precip_large),len(features_idx)))
+print("\\item precipitation sum above 1000~mm for {} of {} features \\\\".format(len(precip_large),len(features_idx)))
 for ii in precip_large:
     print("   {{\\scriptsize UUID: {} $\curvearrowright$ Start and end date = [{},{}]}}\\\\[-4pt]".format(
         #features_idx[ii],
         results[features_idx[ii]]['uuid'],
         results[features_idx[ii]]['results']['start_date_w_buffer'],
         results[features_idx[ii]]['results']['end_date_w_buffer']))
+print("\\item in total {} occurrences not analysed because too long\\\\".format(len(too_long)))
+for itoo_long in too_long:
+    print("   {{\\scriptsize UUID: {} $\curvearrowright$ Length = {} [days]}}\\\\[-4pt]".format(
+        itoo_long['uuid'],
+        itoo_long['length']))
 print("\\item no precipitation event found for {} of {} features\\\\".format(len(no_precip_event_found),len(features_idx)))
 for ii in no_precip_event_found:
     print("   {{\\scriptsize UUID: {} $\curvearrowright$ Start and end date = [{},{}]}}\\\\[-4pt]".format(

diff --git a/src/analyse_occurrence.py b/src/analyse_occurrence.py
@@ -172,6 +172,9 @@ def analyse_occurrence(ifeatures=None,tmpdir='/tmp/',bbox_buffer=0.5,dates_buffe
     # # all GEOMET
     # ifeatures = [2, 3, 4, 6, 38, 119, 123, 127, 137, 138, 139, 141, 142, 152, 220, 229, 367, 389, 405, 490, 510, 515, 516, 553, 560, 643, 838, 872, 876, 877, 882, 884, 894, 899, 902, 903, 909, 911, 916, 917, 942, 956, 964, 970, 972, 974, 980, 981, 1032, 1037, 1039, 1046, 1085, 1106, 1116, 1117, 1118, 1141, 1149, 1155, 1159, 1170, 1173, 1180, 1184, 1201, 1202, 1236, 1243, 1263, 1274, 1310, 1311, 1312, 1313, 1314, 1315, 1317, 1332, 1338, 1345, 1346, 1361, 1366, 1445, 1455, 1481, 1493, 1506, 1518, 1529, 1536, 1538, 1547, 1548, 1549, 1550, 1555, 1562, 1591, 1613, 1625, 1639, 1645, 1662, 1680, 1682, 1730, 1731, 1732, 1735, 1742, 1763, 1765, 1778, 1780, 1781, 1797, 1800, 1841, 1843, 1846, 1853, 1864, 1889]
 
+    # # all too long
+    # ifeatures = [921, 926, 927, 931, 1083, 1869]
+
     # --------------------
     # Load HFE database (occurrences)
     # --------------------
@@ -230,6 +233,39 @@ def analyse_occurrence(ifeatures=None,tmpdir='/tmp/',bbox_buffer=0.5,dates_buffe
                 feature['properties']['flood_cause'],
                 ))
 
+        # --------------------
+        # Make sure occurrence is not super long (> 90. days) which would take a very long time to process
+        # --------------------
+        if not(feature['properties']['end_date'] is None):
+            length_occurrence = (end_date-start_date).days+(end_date-start_date).seconds/60./60./24.
+            if (length_occurrence > 90.):
+                print("analyse_occurrence: Occurrence will NOT be analysed because it is TOO LONG:")
+                print("                    >>> Length occurrence {} (idx={}): {} [days]".format(
+                    feature['properties']['uuid'],
+                    ifeature,
+                    length_occurrence))
+                result['png-ts'].append( [] )
+                result['png'].append( [] )
+                result['gif'].append( [] )
+                result['legend'].append( [] )
+                result['json'].append( [] )
+
+                # save a token file to note that this wont be processed
+                tokenfile = str(Path(tmpdir+'/analyse_occurrence_'+str(ifeature)+'/exception.token'))
+
+                # make sure folder to store file exists; otherwise create
+                Path(tokenfile).parent.mkdir(parents=True, exist_ok=True)
+
+                # save something
+                ff = open(tokenfile, "w")
+                ff.write("analyse_occurrence: Occurrence {} (idx={}) not processed because it is too long ({} days)".format(
+                    feature['properties']['uuid'],
+                    ifeature,
+                    length_occurrence))
+                ff.close()
+
+                continue
+
         # --------------------
         # Determine bounding box
         # --------------------
@@ -685,3 +721,6 @@ def find_names_of_occurrences(event_feature,data_hfe_occur,locations):
 
     # for example, run for all Geomet features:
     # python analyse_occurrence.py --ifeatures "2, 3, 4, 6, 38, 119, 123, 127, 137, 138, 139, 141, 142, 152, 220, 229, 367, 389, 405, 490, 510, 515, 516, 553, 560, 643, 838, 872, 876, 877, 882, 884, 894, 899, 902, 903, 909, 911, 916, 917, 942, 956, 964, 970, 972, 974, 980, 981, 1032, 1037, 1039, 1046, 1085, 1106, 1116, 1117, 1118, 1141, 1149, 1155, 1159, 1170, 1173, 1180, 1184, 1201, 1202, 1236, 1243, 1263, 1274, 1310, 1311, 1312, 1313, 1314, 1315, 1317, 1332, 1338, 1345, 1346, 1361, 1366, 1445, 1455, 1481, 1493, 1506, 1518, 1529, 1536, 1538, 1547, 1548, 1549, 1550, 1555, 1562, 1591, 1613, 1625, 1639, 1645, 1662, 1680, 1682, 1730, 1731, 1732, 1735, 1742, 1763, 1765, 1778, 1780, 1781, 1797, 1800, 1841, 1843, 1846, 1853, 1864, 1889" --bbox_buffer 0.5 --dates_buffer 5.0,0.0 --tmpdir "/project/6070465/julemai/nrcan-hfe/data/output/"
+
+    # for example, run for all features that are too long:
+    # python analyse_occurrence.py --ifeatures "921, 926, 927, 931, 1083, 1869" --bbox_buffer 0.5 --dates_buffer 5.0,0.0 --tmpdir "/project/6070465/julemai/nrcan-hfe/data/output/"