diff --git a/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml b/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml index bc7e8b5be..41363741a 100644 --- a/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml +++ b/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml @@ -77,19 +77,19 @@ builds: region: Africa title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Africa since pandemic start asia_1m: - subsampling_scheme: nextstrain_region_asia_grouped_by_division_1m + subsampling_scheme: nextstrain_region_asia_1m region: Asia title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Asia over the past month asia_2m: - subsampling_scheme: nextstrain_region_asia_grouped_by_division_2m + subsampling_scheme: nextstrain_region_asia_2m region: Asia title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Asia over the past 2 months asia_6m: - subsampling_scheme: nextstrain_region_asia_grouped_by_division_6m + subsampling_scheme: nextstrain_region_asia_6m region: Asia title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Asia over the past 6 months asia_all-time: - subsampling_scheme: nextstrain_region_asia_grouped_by_division_all_time + subsampling_scheme: nextstrain_region_asia_all_time region: Asia title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Asia since pandemic start europe_1m: @@ -280,31 +280,18 @@ subsampling: exclude: "--exclude-where 'region={region}'" # Custom subsampling logic for region Asia over 1m - # Grouping by division - # Separating three buckets for China, India and elsewhere + # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - # 3:2:2 proportions of Asia, China, India - nextstrain_region_asia_grouped_by_division_1m: + nextstrain_region_asia_1m: # Early focal samples for Asia asia_early: - group_by: "division year month" - max_sequences: 300 - max_date: "--max-date 1M" - exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" - # Early focal samples for China - china_early: - group_by: "division year month" - max_sequences: 200 - max_date: "--max-date 1M" - exclude: "--exclude-where 'country!=China'" - # Early focal samples for India - india_early: - group_by: "division year month" - max_sequences: 200 + group_by: "country year month" + group_by_weights: "defaults/population_weights.tsv" + max_sequences: 700 max_date: "--max-date 1M" - exclude: "--exclude-where 'country!=India'" + exclude: "--exclude-where 'region!=Asia'" # Early contextual samples from the rest of the world context_early: group_by: "country year month" @@ -313,22 +300,11 @@ subsampling: exclude: "--exclude-where 'region=Asia'" # Recent focal samples for Asia asia_recent: - group_by: "division week" - max_sequences: 1200 + group_by: "country week" + group_by_weights: "defaults/population_weights.tsv" + max_sequences: 2800 min_date: "--min-date 1M" - exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" - # Recent focal samples for China - china_recent: - group_by: "division week" - max_sequences: 800 - max_date: "--min-date 1M" - exclude: "--exclude-where 'country!=China'" - # Recent focal samples for India - india_recent: - group_by: "division week" - max_sequences: 800 - max_date: "--min-date 1M" - exclude: "--exclude-where 'country!=India'" + exclude: "--exclude-where 'region!=Asia'" # Early contextual samples from the rest of the world context_recent: group_by: "country week" @@ -337,31 +313,18 @@ subsampling: exclude: "--exclude-where 'region=Asia'" # Custom subsampling logic for region Asia over 2m - # Grouping by division - # Separating three buckets for China, India and elsewhere + # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - # 3:2:2 proportions of Asia, China, India - nextstrain_region_asia_grouped_by_division_2m: + nextstrain_region_asia_2m: # Early focal samples for Asia asia_early: - group_by: "division year month" - max_sequences: 300 - max_date: "--max-date 2M" - exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" - # Early focal samples for China - china_early: - group_by: "division year month" - max_sequences: 200 - max_date: "--max-date 2M" - exclude: "--exclude-where 'country!=China'" - # Early focal samples for India - india_early: - group_by: "division year month" - max_sequences: 200 + group_by: "country year month" + group_by_weights: "defaults/population_weights.tsv" + max_sequences: 700 max_date: "--max-date 2M" - exclude: "--exclude-where 'country!=India'" + exclude: "--exclude-where 'region!=Asia'" # Early contextual samples from the rest of the world context_early: group_by: "country year month" @@ -370,22 +333,11 @@ subsampling: exclude: "--exclude-where 'region=Asia'" # Recent focal samples for Asia asia_recent: - group_by: "division week" - max_sequences: 1200 + group_by: "country week" + group_by_weights: "defaults/population_weights.tsv" + max_sequences: 2800 min_date: "--min-date 2M" - exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" - # Recent focal samples for China - china_recent: - group_by: "division week" - max_sequences: 800 - max_date: "--min-date 2M" - exclude: "--exclude-where 'country!=China'" - # Recent focal samples for India - india_recent: - group_by: "division week" - max_sequences: 800 - max_date: "--min-date 2M" - exclude: "--exclude-where 'country!=India'" + exclude: "--exclude-where 'region!=Asia'" # Early contextual samples from the rest of the world context_recent: group_by: "country week" @@ -394,31 +346,18 @@ subsampling: exclude: "--exclude-where 'region=Asia'" # Custom subsampling logic for region Asia over 6m - # Grouping by division - # Separating three buckets for China, India and elsewhere + # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - # 3:2:2 proportions of Asia, China, India - nextstrain_region_asia_grouped_by_division_6m: + nextstrain_region_asia_6m: # Early focal samples for Asia asia_early: - group_by: "division year month" - max_sequences: 300 - max_date: "--max-date 6M" - exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" - # Early focal samples for China - china_early: - group_by: "division year month" - max_sequences: 200 - max_date: "--max-date 6M" - exclude: "--exclude-where 'country!=China'" - # Early focal samples for India - india_early: - group_by: "division year month" - max_sequences: 200 + group_by: "country year month" + group_by_weights: "defaults/population_weights.tsv" + max_sequences: 700 max_date: "--max-date 6M" - exclude: "--exclude-where 'country!=India'" + exclude: "--exclude-where 'region!=Asia'" # Early contextual samples from the rest of the world context_early: group_by: "country year month" @@ -427,22 +366,11 @@ subsampling: exclude: "--exclude-where 'region=Asia'" # Recent focal samples for Asia asia_recent: - group_by: "division year month" - max_sequences: 1200 + group_by: "country year month" + group_by_weights: "defaults/population_weights.tsv" + max_sequences: 2800 min_date: "--min-date 6M" - exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" - # Recent focal samples for China - china_recent: - group_by: "division year month" - max_sequences: 800 - max_date: "--min-date 6M" - exclude: "--exclude-where 'country!=China'" - # Recent focal samples for India - india_recent: - group_by: "division year month" - max_sequences: 800 - max_date: "--min-date 6M" - exclude: "--exclude-where 'country!=India'" + exclude: "--exclude-where 'region!=Asia'" # Early contextual samples from the rest of the world context_recent: group_by: "country year month" @@ -451,27 +379,16 @@ subsampling: exclude: "--exclude-where 'region=Asia'" # Custom subsampling logic for region Asia over all-time - # Grouping by division - # Separating three buckets for China, India and elsewhere + # Grouping by country weighted by population size # 4375 total # 4:1 ratio of focal to context - # 3:2:2 proportions of Asia, China, India - nextstrain_region_asia_grouped_by_division_all_time: + nextstrain_region_asia_all_time: # Focal samples for Asia asia: - group_by: "division year month" - max_sequences: 1500 - exclude: "--exclude-where 'region!=Asia' 'country=China' 'country=India'" - # Focal samples for China - china: - group_by: "division year month" - max_sequences: 1000 - exclude: "--exclude-where 'country!=China'" - # Focal samples for India - india: - group_by: "division year month" - max_sequences: 1000 - exclude: "--exclude-where 'country!=India'" + group_by: "country year month" + group_by_weights: "defaults/population_weights.tsv" + max_sequences: 3500 + exclude: "--exclude-where 'region!=Asia'" # Contextual samples from the rest of the world context: group_by: "country year month" diff --git a/nextstrain_profiles/nextstrain-gisaid/builds.yaml b/nextstrain_profiles/nextstrain-gisaid/builds.yaml index f13692a33..ab07dcc63 100644 --- a/nextstrain_profiles/nextstrain-gisaid/builds.yaml +++ b/nextstrain_profiles/nextstrain-gisaid/builds.yaml @@ -70,19 +70,19 @@ builds: region: Africa title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Africa since pandemic start asia_1m: - subsampling_scheme: nextstrain_region_asia_grouped_by_division_1m + subsampling_scheme: nextstrain_region_asia_1m region: Asia title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Asia over the past month asia_2m: - subsampling_scheme: nextstrain_region_asia_grouped_by_division_2m + subsampling_scheme: nextstrain_region_asia_2m region: Asia title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Asia over the past 2 months asia_6m: - subsampling_scheme: nextstrain_region_asia_grouped_by_division_6m + subsampling_scheme: nextstrain_region_asia_6m region: Asia title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Asia over the past 6 months asia_all-time: - subsampling_scheme: nextstrain_region_asia_grouped_by_division_all_time + subsampling_scheme: nextstrain_region_asia_all_time region: Asia title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Asia since pandemic start europe_1m: @@ -272,12 +272,11 @@ subsampling: exclude: "--exclude-where 'region={region}'" # Custom subsampling logic for region Asia over 1m - # Grouping by division # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - nextstrain_region_asia_grouped_by_division_1m: + nextstrain_region_asia_1m: # Early focal samples for Asia asia_early: group_by: "country year month" @@ -293,7 +292,7 @@ subsampling: exclude: "--exclude-where 'region=Asia'" # Recent focal samples for Asia asia_recent: - group_by: "country year month" + group_by: "country week" group_by_weights: "defaults/population_weights.tsv" max_sequences: 2800 min_date: "--min-date 1M" @@ -306,12 +305,11 @@ subsampling: exclude: "--exclude-where 'region=Asia'" # Custom subsampling logic for region Asia over 2m - # Grouping by division # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - nextstrain_region_asia_grouped_by_division_2m: + nextstrain_region_asia_2m: # Early focal samples for Asia asia_early: group_by: "country year month" @@ -327,7 +325,7 @@ subsampling: exclude: "--exclude-where 'region=Asia'" # Recent focal samples for Asia asia_recent: - group_by: "country year month" + group_by: "country week" group_by_weights: "defaults/population_weights.tsv" max_sequences: 2800 min_date: "--min-date 2M" @@ -340,12 +338,11 @@ subsampling: exclude: "--exclude-where 'region=Asia'" # Custom subsampling logic for region Asia over 6m - # Grouping by division # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - nextstrain_region_asia_grouped_by_division_6m: + nextstrain_region_asia_6m: # Early focal samples for Asia asia_early: group_by: "country year month" @@ -377,7 +374,7 @@ subsampling: # Grouping by country weighted by population size # 4375 total # 4:1 ratio of focal to context - nextstrain_region_asia_grouped_by_division_all_time: + nextstrain_region_asia_all_time: # Focal samples for Asia asia: group_by: "country year month" diff --git a/nextstrain_profiles/nextstrain-open/builds.yaml b/nextstrain_profiles/nextstrain-open/builds.yaml index d7709518b..e39f59da7 100644 --- a/nextstrain_profiles/nextstrain-open/builds.yaml +++ b/nextstrain_profiles/nextstrain-open/builds.yaml @@ -70,19 +70,19 @@ builds: region: Africa title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Africa since pandemic start asia_1m: - subsampling_scheme: nextstrain_region_asia_grouped_by_division_1m + subsampling_scheme: nextstrain_region_asia_1m region: Asia title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Asia over the past month asia_2m: - subsampling_scheme: nextstrain_region_asia_grouped_by_division_2m + subsampling_scheme: nextstrain_region_asia_2m region: Asia title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Asia over the past 2 months asia_6m: - subsampling_scheme: nextstrain_region_asia_grouped_by_division_6m + subsampling_scheme: nextstrain_region_asia_6m region: Asia title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Asia over the past 6 months asia_all-time: - subsampling_scheme: nextstrain_region_asia_grouped_by_division_all_time + subsampling_scheme: nextstrain_region_asia_all_time region: Asia title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Asia since pandemic start europe_1m: @@ -272,12 +272,11 @@ subsampling: exclude: "--exclude-where 'region={region}'" # Custom subsampling logic for region Asia over 1m - # Grouping by division # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - nextstrain_region_asia_grouped_by_division_1m: + nextstrain_region_asia_1m: # Early focal samples for Asia asia_early: group_by: "country year month" @@ -293,7 +292,7 @@ subsampling: exclude: "--exclude-where 'region=Asia'" # Recent focal samples for Asia asia_recent: - group_by: "country year month" + group_by: "country week" group_by_weights: "defaults/population_weights.tsv" max_sequences: 2800 min_date: "--min-date 1M" @@ -306,12 +305,11 @@ subsampling: exclude: "--exclude-where 'region=Asia'" # Custom subsampling logic for region Asia over 2m - # Grouping by division # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - nextstrain_region_asia_grouped_by_division_2m: + nextstrain_region_asia_2m: # Early focal samples for Asia asia_early: group_by: "country year month" @@ -327,7 +325,7 @@ subsampling: exclude: "--exclude-where 'region=Asia'" # Recent focal samples for Asia asia_recent: - group_by: "country year month" + group_by: "country week" group_by_weights: "defaults/population_weights.tsv" max_sequences: 2800 min_date: "--min-date 2M" @@ -340,12 +338,11 @@ subsampling: exclude: "--exclude-where 'region=Asia'" # Custom subsampling logic for region Asia over 6m - # Grouping by division # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - nextstrain_region_asia_grouped_by_division_6m: + nextstrain_region_asia_6m: # Early focal samples for Asia asia_early: group_by: "country year month" @@ -377,7 +374,7 @@ subsampling: # Grouping by country weighted by population size # 4375 total # 4:1 ratio of focal to context - nextstrain_region_asia_grouped_by_division_all_time: + nextstrain_region_asia_all_time: # Focal samples for Asia asia: group_by: "country year month"