-
Notifications
You must be signed in to change notification settings - Fork 0
/
data-clean.txt
486 lines (468 loc) · 46.5 KB
/
data-clean.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
R version 3.4.4 (2018-03-15) -- "Someone to Lean On"
Copyright (C) 2018 The R Foundation for Statistical Computing
Platform: x86_64-apple-darwin15.6.0 (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
Natural language support but running in an English locale
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
[Previously saved workspace restored]
> # Introduction ------------------------------------------------------------
> #
> # 2018-02-14: The data supplied by AP were inconsistent with the database
> # entries. This was due in part to the fact that some isolates had
> # their identifiers changed in the Steadman lab database and others
> # had perished after genotyping and were discarded. We now have two
> # data sets that we will use to update the data to use for the
> # analysis.
> #
> # Setup -------------------------------------------------------------------
>
>
> library("poppr")
Loading required package: adegenet
Loading required package: ade4
/// adegenet 2.1.1 is loaded ////////////
> overview: '?adegenet'
> tutorials/doc/questions: 'adegenetWeb()'
> bug reports/feature requests: adegenetIssues()
This is poppr version 2.7.1. To get started, type package?poppr
OMP parallel support: available
> library("tidyverse")
── [1mAttaching packages[22m ─────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
[32m✔[39m [34mggplot2[39m 2.2.1 [32m✔[39m [34mpurrr [39m 0.2.4
[32m✔[39m [34mtibble [39m 1.4.2 [32m✔[39m [34mdplyr [39m 0.7.4
[32m✔[39m [34mtidyr [39m 0.8.0 [32m✔[39m [34mstringr[39m 1.3.0
[32m✔[39m [34mreadr [39m 1.1.1 [32m✔[39m [34mforcats[39m 0.3.0
── [1mConflicts[22m ────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m masks [34mstats[39m::lag()
> library("readxl")
> library("lubridate")
Attaching package: ‘lubridate’
The following object is masked from ‘package:base’:
date
> if (!interactive()) options(width = 200)
> enc <- getOption("encoding")
> options(encoding = "iso-8859-1")
>
>
> # Merging Data ------------------------------------------------------------
>
> genotypes <- read.genalex(here::here("data/data.csv"), ploidy = 1) %>%
+ genind2df() %>%
+ rownames_to_column("AP-GenoID") %>%
+ as_tibble()
> genotypes
[38;5;246m# A tibble: 95 x 13[39m
`AP-GenoID` pop `5-2` `6-2` `7-2` `8-3` `9-2` `12-2` `20-3` `55-4` `110-4` `114-4` `17-3`
[3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<fct>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m
[38;5;250m 1[39m 1 South America_Brazil_Góias 336 505 [31mNA[39m 256 371 228 283 207 381 367 354
[38;5;250m 2[39m 1010 South America_Brazil_Paraná 340 505 172 264 360 230 292 223 [31mNA[39m 351 360
[38;5;250m 3[39m 1011 South America_Brazil_Paraná 332 505 174 264 371 230 292 207 385 367 360
[38;5;250m 4[39m 1012 South America_Brazil_Paraná 334 505 174 268 371 228 292 207 381 351 360
[38;5;250m 5[39m 1013 South America_Brazil_Paraná 332 505 182 268 375 228 292 207 381 351 360
[38;5;250m 6[39m 1014 South America_Brazil_Paraná 332 495 182 268 388 228 292 207 381 367 360
[38;5;250m 7[39m 1015 South America_Brazil_Góias 332 505 174 264 371 228 292 207 385 351 354
[38;5;250m 8[39m 1016 South America_Brazil_Góias 332 505 174 264 370 230 292 183 385 351 354
[38;5;250m 9[39m 1017 South America_Brazil_Paraná 332 505 174 264 388 230 292 207 [31mNA[39m 351 360
[38;5;250m10[39m 1018 South America_Brazil_Góias 332 505 174 264 371 230 [31mNA[39m 207 385 351 360
[38;5;246m# ... with 85 more rows[39m
>
> # reading in the excel sheet has its own problems since the date column contains
> # part dates and part text and they get screwed up no matter what you do. The
> # way I've dealt with this: import as dates and then convert what didn't parse
> # into the number of days since 1899-12-30
> metadata <- read_excel(here::here("data/MasterGenoMCGDataBrazilPaper2018.xlsx"),
+ col_types = "text",
+ na = c("NA", "")) %>%
+ mutate(date = as.Date(parse_date_time(`JRS-Collection Date`, c("mdy", "y")))) %>%
+ mutate(date = case_when(
+ is.na(date) ~ as.Date("1899-12-30") + days(as.integer(`JRS-Collection Date`)),
+ TRUE ~ date
+ ))
Warning messages:
1: 11 failed to parse.
2: In period(day = x) : NAs introduced by coercion
> metadata
[38;5;246m# A tibble: 95 x 11[39m
MCG `AP-GenoID` InventoryID `in-JRS-collection` `AP-Continent_Country_Population` `JRS-Isolate #` `JRS-Collection Date` `JRS-Source (Host)` `JRS-Geographical Location` `JRS-Notes` date
[3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<date>[39m[23m
[38;5;250m 1[39m A 143 143 TRUE North America_United States_Nebraska 143 1977 Soybeans Greeley, CO [38;5;246m"[39mCalled \"C[38;5;246m[39m… 1977-01-01
[38;5;250m 2[39m K 202 202 TRUE North America_United States_Nebraska 202 33117 Soybean Ithaca, NE Hobbit 1990-09-01
[38;5;250m 3[39m K 264 264 TRUE North America_United States_Nebraska 264 34516 Soybeans Platte Co., NE 0 1994-07-01
[38;5;250m 4[39m H 265 265 TRUE North America_United States_Nebraska 265 34547 Soybeans Tekamah, Burt Co., NE 0 1994-08-01
[38;5;250m 5[39m A 266 266 TRUE North America_United States_Nebraska 266 34547 Soybeans Saunders Co., NE 0 1994-08-01
[38;5;250m 6[39m I 267 267 TRUE North America_United States_Nebraska 267 34578 Soybeans Bellwood, NE ICI-297 1994-09-01
[38;5;250m 7[39m J 268 268 TRUE North America_United States_Nebraska 268 34578 Soybeans Herman, NE NK-2939 1994-09-01
[38;5;250m 8[39m A 276 276 TRUE North America_United States_Nebraska 276 35309 Soybean Ord, NE 30 scleroti… 1996-09-01
[38;5;250m 9[39m K 289 289 TRUE North America_United States_Nebraska 289 1996 Soybeans UNL PN, Lincoln, NE [38;5;246m"[39mFrom Les C[38;5;246m[39m… 1996-01-01
[38;5;250m10[39m C 293A 293 FALSE South America_Argentina_Argentina 293 1996 Soybean Cull Argentina Juan C. Sua… 1996-01-01
[38;5;246m# ... with 85 more rows[39m
> full_data <- left_join(metadata, genotypes, by = "AP-GenoID")
>
>
> # Data Cleaning -----------------------------------------------------------
> #
> # Because there are discrepancies between the locations, we will rely on the
> # location information from the JRS database. Unfortunately, there is no
> # consistent pattern in naming, so will will manually create a table from the
> # data and use that to match genotypes. Those without region names will have the
> # country or state name in place.
>
>
> full_data %>%
+ select(`JRS-Geographical Location`, pop) %>%
+ distinct() %>%
+ mutate(reglen = max(nchar(`JRS-Geographical Location`), na.rm = TRUE) - nchar(`JRS-Geographical Location`)) %>%
+ filter(!is.na(`JRS-Geographical Location`)) %>%
+ rowwise() %>%
+ mutate(trail = paste(rep(" ", reglen), collapse = "")) %>%
+ glue::glue_data("'{`JRS-Geographical Location`}'{trail} ~ '{pop}',")
'Greeley, CO' ~ 'North America_United States_Nebraska',
'Ithaca, NE' ~ 'North America_United States_Nebraska',
'Platte Co., NE' ~ 'North America_United States_Nebraska',
'Tekamah, Burt Co., NE' ~ 'North America_United States_Nebraska',
'Saunders Co., NE' ~ 'North America_United States_Nebraska',
'Bellwood, NE' ~ 'North America_United States_Nebraska',
'Herman, NE' ~ 'North America_United States_Nebraska',
'Ord, NE' ~ 'North America_United States_Nebraska',
'UNL PN, Lincoln, NE' ~ 'North America_United States_Nebraska',
'Argentina' ~ 'South America_Argentina_Argentina',
'Mead, Nebraska' ~ 'North America_United States_Nebraska',
'Ewing, NE' ~ 'North America_United States_Nebraska',
'Auburn, NE' ~ 'North America_United States_Nebraska',
'Rio Verde/GO, Brazil' ~ 'South America_Brazil_Góias',
'Campo Mourão/PR, Brazil' ~ 'South America_Brazil_Paraná',
'São Miguel do Passo Quatro/GO' ~ 'South America_Brazil_Góias',
'Pinhão/PR, Brazil' ~ 'South America_Brazil_Góias',
'Formoso, GO, Brazil' ~ 'South America_Brazil_Paraná',
'Guarapuava, PR, Brazil' ~ 'South America_Brazil_Góias',
'Luiz Eduardo Magalhães/BA, Brazil' ~ 'South America_Brazil_Bahia',
'Pinhão/PR, Brazil' ~ 'South America_Brazil_Paraná',
'Mauá da Serra/PR, Brazil' ~ 'South America_Brazil_Paraná',
'Nᾶo me Toque/Rio Grande do Sul, Brazil' ~ 'South America_Brazil_Rio Grande do Sul',
'Sᾶo Desidério/Bahia, Brazil' ~ 'South America_Brazil_Bahia',
'Jataí/GO, Brazil' ~ 'South America_Brazil_Góias',
'Cristalina/GO, Brazil' ~ 'South America_Brazil_Góias',
'Formosa/GO, Brazil' ~ 'South America_Brazil_Góias',
'Sudeste/GO, Brazil' ~ 'South America_Brazil_Góias',
'Uberlândia/MG, Brazil' ~ 'South America_Brazil_Minas Gerias',
'Correntina/BA, Brazil' ~ 'South America_Brazil_Bahia',
'Bahia, Brazil' ~ 'South America_Brazil_Bahia',
'Vacaria/RS, Brazil' ~ 'South America_Brazil_Rio Grande do Sul',
'Coxilha/RS, Brazil' ~ 'South America_Brazil_Rio Grande do Sul',
'Faxinal/PR, Brazil' ~ 'South America_Brazil_Paraná',
'Chapadão do Sul/MS, Brazil' ~ 'South America_Brazil_Mato Grosso do Sul',
>
> full_data <- mutate(full_data,
+ continent_country_state_region = case_when(
+ # Because there is only one point from CO in the data, we will compress both
+ # CO and NE into a single region called "Midwest"
+ `JRS-Geographical Location` == 'Greeley, CO' ~ 'North America_United States_Midwest_Greeley, CO',
+ `JRS-Geographical Location` == 'Ithaca, NE' ~ 'North America_United States_Midwest_Ithaca, NE',
+ `JRS-Geographical Location` == 'Platte Co., NE' ~ 'North America_United States_Midwest_Platte Co., NE',
+ `JRS-Geographical Location` == 'Tekamah, Burt Co., NE' ~ 'North America_United States_Midwest_Tekamah, NE',
+ `JRS-Geographical Location` == 'Saunders Co., NE' ~ 'North America_United States_Midwest_Saunders Co., NE',
+ `JRS-Geographical Location` == 'Bellwood, NE' ~ 'North America_United States_Midwest_Bellwood, NE',
+ `JRS-Geographical Location` == 'Herman, NE' ~ 'North America_United States_Midwest_Herman, NE',
+ `JRS-Geographical Location` == 'Ord, NE' ~ 'North America_United States_Midwest_Ord, NE',
+ `JRS-Geographical Location` == 'UNL PN, Lincoln, NE' ~ 'North America_United States_Midwest_Lincoln, NE',
+ `JRS-Geographical Location` == 'Mead, Nebraska' ~ 'North America_United States_Midwest_Mead, NE',
+ `JRS-Geographical Location` == 'Ewing, NE' ~ 'North America_United States_Midwest_Ewing, NE',
+ `JRS-Geographical Location` == 'Auburn, NE' ~ 'North America_United States_Midwest_Auburn, NE',
+ `JRS-Geographical Location` == 'Argentina' ~ 'South America_Argentina_Argentina_Argentina',
+ `JRS-Geographical Location` == 'Rio Verde/GO, Brazil' ~ 'South America_Brazil_Goiás_Rio Verde',
+ `JRS-Geographical Location` == 'Campo Mourão/PR, Brazil' ~ 'South America_Brazil_Paraná_Campo Mourão',
+ `JRS-Geographical Location` == 'São Miguel do Passo Quatro/GO' ~ 'South America_Brazil_Goiás_São Miguel do Passo',
+ `JRS-Geographical Location` == 'Pinhão/PR, Brazil' ~ 'South America_Brazil_Paraná_Pinhão', # Note: in Anthony's data, one isolate labeled as South America_Brazil_Goiás
+ `JRS-Geographical Location` == 'Formoso, GO, Brazil' ~ 'South America_Brazil_Goiás_Formoso',
+ `JRS-Geographical Location` == 'Guarapuava, PR, Brazil' ~ 'South America_Brazil_Paraná_Guarapuava',
+ `JRS-Geographical Location` == 'Luiz Eduardo Magalhães/BA, Brazil' ~ 'South America_Brazil_Bahia_Bahia',
+ `JRS-Geographical Location` == 'Mauá da Serra/PR, Brazil' ~ 'South America_Brazil_Paraná_Mauá da Serra',
+ `JRS-Geographical Location` == 'Nᾶo me Toque/Rio Grande do Sul, Brazil' ~ 'South America_Brazil_Rio Grande do Sul_Não me Toque',
+ `JRS-Geographical Location` == 'Sᾶo Desidério/Bahia, Brazil' ~ 'South America_Brazil_Bahia_São Desidério',
+ `JRS-Geographical Location` == 'Jataí/GO, Brazil' ~ 'South America_Brazil_Goiás_Jataí',
+ `JRS-Geographical Location` == 'Cristalina/GO, Brazil' ~ 'South America_Brazil_Goiás_Cristalina',
+ `JRS-Geographical Location` == 'Formosa/GO, Brazil' ~ 'South America_Brazil_Goiás_Formosa',
+ `JRS-Geographical Location` == 'Sudeste/GO, Brazil' ~ 'South America_Brazil_Goiás_Sudeste',
+ `JRS-Geographical Location` == 'Uberlândia/MG, Brazil' ~ 'South America_Brazil_Minas Gerais_Uberlândia',
+ `JRS-Geographical Location` == 'Correntina/BA, Brazil' ~ 'South America_Brazil_Bahia_Correntina',
+ `JRS-Geographical Location` == 'Bahia, Brazil' ~ 'South America_Brazil_Bahia_Bahia',
+ `JRS-Geographical Location` == 'Vacaria/RS, Brazil' ~ 'South America_Brazil_Rio Grande do Sul_Vacaria',
+ `JRS-Geographical Location` == 'Coxilha/RS, Brazil' ~ 'South America_Brazil_Rio Grande do Sul_Coxilha',
+ `JRS-Geographical Location` == 'Faxinal/PR, Brazil' ~ 'South America_Brazil_Paraná_Faxinal',
+ `JRS-Geographical Location` == 'Chapadão do Sul/MS, Brazil' ~ 'South America_Brazil_Mato Grosso do Sul_Chapadão do Sul',
+ TRUE ~ `JRS-Geographical Location`
+ ))
>
>
> # Saving Cleaned Data as CSV -----------------------------------------------
> #
> # Now that the data are cleaned, I will save the important bits for reproduction
> # as both a CSV and a genclone object.
>
> clean_data <- full_data %>%
+ select(GenoID = `AP-GenoID`,
+ MCG,
+ Year = date,
+ continent_country_state_region,
+ matches("\\d-\\d")) %>%
+ mutate(Year = year(Year)) %>%
+ filter(!is.na(continent_country_state_region)) %>% # remove isolate that has no info
+ separate(continent_country_state_region,
+ c("Continent", "Country", "Population", "Subpop"),
+ sep = "_") %>%
+ write_csv(path = here::here("data/clean-genotypes.csv"))
>
> # Head off any encoding issues
> readLines(here::here("data/clean-genotypes.csv")) %>%
+ iconv(from = "UTF-8", to = "ISO-8859-1") %>%
+ writeLines(con = here::here("data/clean-genotypes.csv"))
>
> print(clean_data, n = 100)
[38;5;246m# A tibble: 94 x 18[39m
GenoID MCG Year Continent Country Population Subpop `5-2` `6-2` `7-2` `8-3` `9-2` `12-2` `20-3` `55-4` `110-4` `114-4` `17-3`
[3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m
[38;5;250m 1[39m 143 A [4m1[24m977. North America United States Midwest Greeley, CO 340 501 184 264 371 228 292 207 397 351 360
[38;5;250m 2[39m 202 K [4m1[24m990. North America United States Midwest Ithaca, NE 340 501 184 264 371 228 292 207 397 351 360
[38;5;250m 3[39m 264 K [4m1[24m994. North America United States Midwest Platte Co., NE 340 501 184 264 371 228 292 207 397 351 360
[38;5;250m 4[39m 265 H [4m1[24m994. North America United States Midwest Tekamah, NE 340 501 184 264 371 228 292 207 397 351 360
[38;5;250m 5[39m 266 A [4m1[24m994. North America United States Midwest Saunders Co., NE 340 501 184 264 371 228 292 207 397 351 360
[38;5;250m 6[39m 267 I [4m1[24m994. North America United States Midwest Bellwood, NE 340 [31mNA[39m 184 256 371 228 292 201 397 351 360
[38;5;250m 7[39m 268 J [4m1[24m994. North America United States Midwest Herman, NE 324 501 184 264 371 230 294 183 397 351 360
[38;5;250m 8[39m 276 A [4m1[24m996. North America United States Midwest Ord, NE 332 501 184 264 371 228 292 207 397 367 354
[38;5;250m 9[39m 289 K [4m1[24m996. North America United States Midwest Lincoln, NE 332 501 [31mNA[39m 264 388 228 292 207 389 367 354
[38;5;250m10[39m 293A C [4m1[24m996. South America Argentina Argentina Argentina [31mNA[39m 495 182 268 388 228 292 207 381 367 360
[38;5;250m11[39m 293B D [4m1[24m996. South America Argentina Argentina Argentina 330 505 182 264 388 228 285 207 381 351 360
[38;5;250m12[39m 293C E [4m1[24m996. South America Argentina Argentina Argentina 340 495 182 264 371 230 292 207 385 351 360
[38;5;250m13[39m 293D F [4m1[24m996. South America Argentina Argentina Argentina 340 505 174 264 388 226 294 207 385 351 360
[38;5;250m14[39m 293E G [4m1[24m996. South America Argentina Argentina Argentina [31mNA[39m 495 182 280 [31mNA[39m 226 287 270 381 367 360
[38;5;250m15[39m 399A B [4m1[24m999. North America United States Midwest Mead, NE 332 495 182 262 371 230 292 203 397 367 354
[38;5;250m16[39m 399B B [4m1[24m999. North America United States Midwest Mead, NE 332 501 182 262 371 228 292 [31mNA[39m 397 367 354
[38;5;250m17[39m 541A M [4m2[24m012. North America United States Midwest Ewing, NE 340 495 184 264 371 230 294 258 381 351 360
[38;5;250m18[39m 541B J [4m2[24m012. North America United States Midwest Ewing, NE 332 501 182 264 371 234 294 207 389 351 354
[38;5;250m19[39m 834A L [4m2[24m009. North America United States Midwest Auburn, NE 334 495 184 262 371 234 292 207 385 351 360
[38;5;250m20[39m 834B L [4m2[24m009. North America United States Midwest Auburn, NE 330 [31mNA[39m 184 262 360 228 292 183 389 367 360
[38;5;250m21[39m 978E2 [31mNA[39m [4m2[24m012. South America Brazil Goiás Rio Verde 332 505 174 264 [31mNA[39m 230 292 207 385 351 360
[38;5;250m22[39m 1010 R [4m2[24m014. South America Brazil Paraná Campo Mourão 340 505 172 264 360 230 292 223 [31mNA[39m 351 360
[38;5;250m23[39m 1011 R [4m2[24m014. South America Brazil Paraná Campo Mourão 332 505 174 264 371 230 292 207 385 367 360
[38;5;250m24[39m 1012 Q [4m2[24m014. South America Brazil Paraná Campo Mourão 334 505 174 268 371 228 292 207 381 351 360
[38;5;250m25[39m 1013 Q [4m2[24m014. South America Brazil Paraná Campo Mourão 332 505 182 268 375 228 292 207 381 351 360
[38;5;250m26[39m 1014 Q [4m2[24m014. South America Brazil Paraná Campo Mourão 332 495 182 268 388 228 292 207 381 367 360
[38;5;250m27[39m 1015 N [4m2[24m012. South America Brazil Goiás São Miguel do Passo 332 505 174 264 371 228 292 207 385 351 354
[38;5;250m28[39m 1016 N [4m2[24m012. South America Brazil Paraná Pinhão 332 505 174 264 370 230 292 183 385 351 354
[38;5;250m29[39m 1017 N [4m2[24m012. South America Brazil Goiás Formoso 332 505 174 264 388 230 292 207 [31mNA[39m 351 360
[38;5;250m30[39m 1018 R [4m2[24m012. South America Brazil Paraná Guarapuava 332 505 174 264 371 230 [31mNA[39m 207 385 351 360
[38;5;250m31[39m 1019 N [4m2[24m012. South America Brazil Bahia Bahia 332 505 174 264 371 230 292 207 [31mNA[39m 351 360
[38;5;250m32[39m 1020 Y [4m2[24m012. South America Brazil Paraná Pinhão 332 505 182 264 371 234 294 219 385 351 354
[38;5;250m33[39m 1021 Z [4m2[24m012. South America Brazil Paraná Mauá da Serra 340 505 182 280 371 230 292 207 381 351 354
[38;5;250m34[39m 1022 N [4m2[24m012. South America Brazil Rio Grande do Sul Não me Toque 332 505 174 264 371 230 283 207 385 351 360
[38;5;250m35[39m 1023 N [4m2[24m012. South America Brazil Bahia São Desidério 332 505 174 264 371 230 292 207 385 367 360
[38;5;250m36[39m 1 [31mNA[39m [4m2[24m010. South America Brazil Goiás Jataí 336 505 [31mNA[39m 256 371 228 283 207 381 367 354
[38;5;250m37[39m 3 [31mNA[39m [4m2[24m010. South America Brazil Goiás Jataí 332 505 172 256 371 228 [31mNA[39m 207 381 351 360
[38;5;250m38[39m 5 [31mNA[39m [4m2[24m010. South America Brazil Goiás Cristalina 332 495 186 264 388 234 283 207 381 367 354
[38;5;250m39[39m 6 [31mNA[39m [4m2[24m010. South America Brazil Goiás Formosa 332 505 172 256 388 228 335 207 385 351 354
[38;5;250m40[39m 7 [31mNA[39m [4m2[24m010. South America Brazil Goiás Formosa 332 505 174 264 371 230 283 207 385 367 354
[38;5;250m41[39m 8 [31mNA[39m [4m2[24m010. South America Brazil Goiás Sudeste 332 505 174 264 388 230 292 207 385 351 360
[38;5;250m42[39m 9 [31mNA[39m [4m2[24m010. South America Brazil Goiás Sudeste 326 505 174 264 371 230 [31mNA[39m 207 385 367 360
[38;5;250m43[39m 12 [31mNA[39m [4m2[24m010. South America Brazil Goiás Sudeste 340 505 174 264 371 230 283 207 385 367 354
[38;5;250m44[39m 13 [31mNA[39m [4m2[24m010. South America Brazil Goiás Sudeste 326 505 172 256 388 228 283 207 385 367 354
[38;5;250m45[39m 14 [31mNA[39m [4m2[24m010. South America Brazil Goiás Sudeste 332 505 174 264 371 230 283 207 385 367 354
[38;5;250m46[39m 15 [31mNA[39m [4m2[24m009. South America Brazil Minas Gerais Uberlândia 332 505 172 256 388 228 [31mNA[39m 207 381 367 354
[38;5;250m47[39m 16 [31mNA[39m [4m2[24m009. South America Brazil Minas Gerais Uberlândia 332 505 174 264 388 230 285 207 385 367 354
[38;5;250m48[39m 17 [31mNA[39m [4m2[24m009. South America Brazil Minas Gerais Uberlândia 330 505 174 264 388 228 [31mNA[39m 207 385 367 354
[38;5;250m49[39m 18 [31mNA[39m [4m2[24m009. South America Brazil Minas Gerais Uberlândia 330 505 174 264 388 230 283 207 385 367 354
[38;5;250m50[39m 20 [31mNA[39m [4m2[24m009. South America Brazil Minas Gerais Uberlândia 330 505 174 264 388 230 283 207 385 367 354
[38;5;250m51[39m 22 [31mNA[39m [4m2[24m009. South America Brazil Minas Gerais Uberlândia 332 505 174 264 388 230 283 207 385 367 354
[38;5;250m52[39m 24 [31mNA[39m [4m2[24m009. South America Brazil Minas Gerais Uberlândia 332 505 174 264 371 230 283 207 385 367 354
[38;5;250m53[39m 25 [31mNA[39m [4m2[24m010. South America Brazil Bahia Correntina 332 505 174 264 388 230 283 207 385 367 354
[38;5;250m54[39m 26 [31mNA[39m [4m2[24m010. South America Brazil Bahia Correntina 332 505 174 264 371 230 [31mNA[39m 207 385 367 354
[38;5;250m55[39m 27 [31mNA[39m [4m2[24m010. South America Brazil Bahia Correntina 332 [31mNA[39m [31mNA[39m 264 388 234 283 207 381 367 354
[38;5;250m56[39m 29 [31mNA[39m [4m2[24m010. South America Brazil Bahia Correntina 332 505 174 264 388 230 283 207 385 367 360
[38;5;250m57[39m 31 [31mNA[39m [4m2[24m010. South America Brazil Bahia Correntina 332 505 174 264 388 230 289 207 385 367 354
[38;5;250m58[39m 32 [31mNA[39m [4m2[24m010. South America Brazil Bahia Correntina 332 505 174 264 370 230 293 207 385 367 354
[38;5;250m59[39m 973A N [4m2[24m012. South America Brazil Bahia Bahia [31mNA[39m 505 174 264 371 240 292 207 [31mNA[39m 367 360
[38;5;250m60[39m 973B P [4m2[24m012. South America Brazil Bahia Bahia 340 495 186 264 371 234 292 191 381 351 354
[38;5;250m61[39m 973C P [4m2[24m012. South America Brazil Bahia Bahia 340 495 176 264 371 234 292 191 381 367 360
[38;5;250m62[39m 973D P [4m2[24m012. South America Brazil Bahia Bahia 332 495 186 264 388 234 292 207 381 367 354
[38;5;250m63[39m 973E U [4m2[24m012. South America Brazil Bahia Bahia 340 505 172 256 371 228 292 [31mNA[39m 381 367 354
[38;5;250m64[39m 974A O [4m2[24m012. South America Brazil Rio Grande do Sul Vacaria 332 505 172 256 371 228 294 207 [31mNA[39m 367 354
[38;5;250m65[39m 974B O [4m2[24m012. South America Brazil Rio Grande do Sul Vacaria 340 495 182 280 388 230 294 207 397 351 354
[38;5;250m66[39m 974C V [4m2[24m012. South America Brazil Rio Grande do Sul Vacaria 340 495 182 280 360 234 294 209 397 367 354
[38;5;250m67[39m 974D O [4m2[24m012. South America Brazil Rio Grande do Sul Vacaria 332 525 182 280 371 226 292 [31mNA[39m [31mNA[39m 367 354
[38;5;250m68[39m 974E O [4m2[24m012. South America Brazil Rio Grande do Sul Vacaria 340 525 182 280 366 226 294 [31mNA[39m 397 367 354
[38;5;250m69[39m 975A Q [4m2[24m012. South America Brazil Rio Grande do Sul Coxilha 332 505 182 268 388 228 292 183 381 367 354
[38;5;250m70[39m 975B R [4m2[24m012. South America Brazil Rio Grande do Sul Coxilha [31mNA[39m 505 174 264 388 230 292 207 385 351 354
[38;5;250m71[39m 975C R [4m2[24m012. South America Brazil Rio Grande do Sul Coxilha 332 505 174 264 371 228 292 207 385 351 354
[38;5;250m72[39m 975D W [4m2[24m012. South America Brazil Rio Grande do Sul Coxilha 332 505 174 264 371 230 283 [31mNA[39m 385 367 354
[38;5;250m73[39m 975E Q [4m2[24m012. South America Brazil Rio Grande do Sul Coxilha 332 505 182 268 388 230 292 203 381 351 354
[38;5;250m74[39m 976A S [4m2[24m012. South America Brazil Paraná Faxinal 334 495 [31mNA[39m 266 388 234 292 207 381 351 354
[38;5;250m75[39m 976B T [4m2[24m012. South America Brazil Paraná Faxinal 332 505 172 256 388 228 292 [31mNA[39m 385 351 354
[38;5;250m76[39m 976B2 [31mNA[39m [4m2[24m012. South America Brazil Paraná Faxinal 332 505 172 256 388 228 292 195 385 367 360
[38;5;250m77[39m 976C S [4m2[24m012. South America Brazil Paraná Faxinal [31mNA[39m 495 182 266 388 234 292 207 381 351 360
[38;5;250m78[39m 976D T [4m2[24m012. South America Brazil Paraná Faxinal 332 505 174 256 371 228 294 195 385 367 354
[38;5;250m79[39m 976E T [4m2[24m012. South America Brazil Paraná Faxinal 340 505 172 254 371 228 292 [31mNA[39m 385 351 354
[38;5;250m80[39m 977A T [4m2[24m012. South America Brazil Mato Grosso do Sul Chapadão do Sul 328 505 182 280 388 226 292 207 397 367 354
[38;5;250m81[39m 977B U [4m2[24m012. South America Brazil Mato Grosso do Sul Chapadão do Sul 332 505 172 256 388 228 292 207 381 351 354
[38;5;250m82[39m 977C I [4m2[24m012. South America Brazil Mato Grosso do Sul Chapadão do Sul 332 505 172 256 [31mNA[39m 228 292 207 385 367 360
[38;5;250m83[39m 977D I [4m2[24m012. South America Brazil Mato Grosso do Sul Chapadão do Sul 332 505 172 256 371 228 296 [31mNA[39m 385 367 360
[38;5;250m84[39m 977E N [4m2[24m012. South America Brazil Mato Grosso do Sul Chapadão do Sul 332 505 174 264 [31mNA[39m 234 292 207 385 367 360
[38;5;250m85[39m 978A N [4m2[24m012. South America Brazil Goiás Rio Verde 332 505 182 264 371 230 292 207 [31mNA[39m 367 354
[38;5;250m86[39m 978B N [4m2[24m012. South America Brazil Goiás Rio Verde 332 505 174 264 371 234 292 191 385 367 354
[38;5;250m87[39m 978C N [4m2[24m012. South America Brazil Goiás Rio Verde 332 525 174 264 371 230 292 207 [31mNA[39m 367 354
[38;5;250m88[39m 978D N [4m2[24m012. South America Brazil Goiás Rio Verde 332 505 174 264 371 230 284 207 385 367 354
[38;5;250m89[39m 978E N [4m2[24m012. South America Brazil Goiás Rio Verde 332 505 174 264 [31mNA[39m 230 292 207 385 367 354
[38;5;250m90[39m 979A O [4m2[24m012. South America Brazil Rio Grande do Sul Vacaria 330 525 182 280 371 226 294 207 385 367 360
[38;5;250m91[39m 979B N [4m2[24m012. South America Brazil Rio Grande do Sul Vacaria [31mNA[39m 505 174 264 [31mNA[39m 230 292 207 385 367 360
[38;5;250m92[39m 979C O [4m2[24m012. South America Brazil Rio Grande do Sul Vacaria 330 525 182 280 388 226 294 207 385 367 360
[38;5;250m93[39m 979D N [4m2[24m012. South America Brazil Rio Grande do Sul Vacaria 332 505 174 264 371 230 292 207 385 367 360
[38;5;250m94[39m 979E N [4m2[24m012. South America Brazil Rio Grande do Sul Vacaria 332 505 174 264 371 230 295 207 385 367 360
>
>
> # Converting to genclone, adding Repeat Lengths, Palette ------------------
> gid <- df2genind(select(clean_data, matches("\\d-\\d")),
+ ploidy = 1,
+ ind.names = clean_data$GenoID,
+ strata = select(clean_data,
+ GenoID, Continent, Country, Population, Subpop, MCG, Year)) %>%
+ as.genclone() %>%
+ setPop(~Population)
>
> # This is a color-blind friendly palette
> other(gid)$palette <- c("Midwest" = "#000000",
+ "Argentina" = "#F0E442", # "#E69F00",
+ "Bahia" = "#56B4E9",
+ "Goiás" = "#009E73",
+ "Mato Grosso do Sul" = "#E69F00",
+ "Minas Gerais" = "#0072B2",
+ "Paraná" = "#D55E00",
+ "Rio Grande do Sul" = "#CC79A7")
>
> # These are the repeat lengths that we are correcting to avoid rounding errors
> (other(gid)$REPLEN <- fix_replen(gid, c(2, 6, 2, 2, 2, 2, 4, 4, 4, 4, 3)))
Warning in fix_replen(gid, c(2, 6, 2, 2, 2, 2, 4, 4, 4, 4, 3)) :
The repeat lengths for 20-3, 55-4 are not consistent.
This might be due to inconsistent allele calls or repeat lengths that are too large.
Check the alleles to make sure there are no duplicated or similar alleles that might end up being the same after division.
Repeat lengths with some modification are being returned: 6-2
5-2 6-2 7-2 8-3 9-2 12-2 20-3 55-4 110-4 114-4 17-3
2.00000 6.00001 2.00000 2.00000 2.00000 2.00000 4.00000 4.00000 4.00000 4.00000 3.00000
>
> write_rds(gid, path = here::here("data/full-genclone-object.rds"))
>
> # Session Information -----------------------------------------------------
>
>
> sessioninfo::session_info()
─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
setting value
version R version 3.4.4 (2018-03-15)
os macOS High Sierra 10.13.4
system x86_64, darwin15.6.0
ui X11
language (EN)
collate en_US.UTF-8
tz America/Chicago
date 2018-04-13
─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
package * version date source
ade4 * 1.7-11 2018-04-05 CRAN (R 3.4.4)
adegenet * 2.1.1 2018-02-02 CRAN (R 3.4.3)
ansistrings 1.0.0.9000 2018-04-04 Github (r-lib/ansistrings@4e4d309)
ape 5.1 2018-04-04 CRAN (R 3.4.4)
assertthat 0.2.0 2017-04-11 CRAN (R 3.4.0)
backports 1.1.2 2017-12-13 CRAN (R 3.4.3)
bindr 0.1.1 2018-03-13 CRAN (R 3.4.4)
bindrcpp * 0.2.2 2018-03-29 CRAN (R 3.4.4)
boot 1.3-20 2017-07-30 CRAN (R 3.4.1)
broom 0.4.4 2018-03-29 CRAN (R 3.4.4)
cellranger 1.1.0 2016-07-27 CRAN (R 3.4.0)
cli 1.0.0.9002 2018-04-04 Github (r-lib/cli@c408924)
clisymbols 1.2.0 2017-05-21 cran (@1.2.0)
cluster 2.0.7-1 2018-04-09 CRAN (R 3.4.4)
coda 0.19-1 2016-12-08 CRAN (R 3.4.0)
colorspace 1.4-0 2017-11-23 R-Forge (R 3.4.2)
crayon 1.3.4 2018-04-04 Github (gaborcsardi/crayon@95b3eae)
deldir 0.1-15 2018-04-01 CRAN (R 3.4.4)
digest 0.6.15 2018-01-28 cran (@0.6.15)
dplyr * 0.7.4 2017-09-28 CRAN (R 3.4.1)
expm 0.999-2 2017-03-29 CRAN (R 3.4.0)
fastmatch 1.1-0 2017-01-28 CRAN (R 3.4.0)
forcats * 0.3.0 2018-02-19 CRAN (R 3.4.3)
foreign 0.8-69 2017-06-21 CRAN (R 3.4.0)
gdata 2.18.0 2017-06-06 CRAN (R 3.4.0)
ggplot2 * 2.2.1 2016-12-30 CRAN (R 3.4.0)
glue 1.2.0 2017-10-29 CRAN (R 3.4.2)
gmodels 2.16.2 2015-07-22 CRAN (R 3.4.0)
gtable 0.2.0 2016-02-26 CRAN (R 3.4.0)
gtools 3.5.0 2015-05-29 CRAN (R 3.4.0)
haven 1.1.1 2018-01-18 CRAN (R 3.4.3)
here 0.1 2017-05-28 CRAN (R 3.4.0)
hms 0.4.2 2018-03-10 CRAN (R 3.4.4)
htmltools 0.3.6 2017-04-28 CRAN (R 3.4.0)
httpuv 1.3.6.2 2018-03-02 CRAN (R 3.4.3)
httr 1.3.1 2017-08-20 cran (@1.3.1)
igraph 1.2.1 2018-03-10 CRAN (R 3.4.4)
jsonlite 1.5 2017-06-01 CRAN (R 3.4.0)
lattice 0.20-35 2017-03-25 CRAN (R 3.4.0)
lazyeval 0.2.1 2017-10-29 CRAN (R 3.4.2)
LearnBayes 2.15.1 2018-03-18 CRAN (R 3.4.4)
lubridate * 1.7.3 2018-02-27 CRAN (R 3.4.3)
magrittr 1.5 2014-11-22 CRAN (R 3.4.0)
MASS 7.3-49 2018-02-23 CRAN (R 3.4.3)
Matrix 1.2-13 2018-04-02 CRAN (R 3.4.4)
mgcv 1.8-23 2018-01-15 CRAN (R 3.4.3)
mime 0.5 2016-07-07 CRAN (R 3.4.0)
mnormt 1.5-5 2016-10-15 CRAN (R 3.4.0)
modelr 0.1.1 2017-07-24 CRAN (R 3.4.1)
munsell 0.4.3 2016-02-13 CRAN (R 3.4.0)
nlme 3.1-137 2018-04-07 CRAN (R 3.4.4)
pegas 0.10-4 2018-03-15 local
permute 0.9-4 2016-09-09 CRAN (R 3.4.0)
phangorn 2.4.0 2018-02-15 CRAN (R 3.4.3)
pillar 1.2.1 2018-02-27 CRAN (R 3.4.3)
pkgconfig 2.0.1 2017-03-21 CRAN (R 3.4.0)
plyr 1.8.4 2016-06-08 CRAN (R 3.4.0)
polysat 1.7-2 2017-08-17 CRAN (R 3.4.1)
poppr * 2.7.1 2018-03-16 CRAN (R 3.4.4)
prettyunits 1.0.2 2015-07-13 CRAN (R 3.4.0)
progress 1.1.2.9003 2018-04-04 Github (r-lib/progress@e525de4)
psych 1.8.3.3 2018-03-30 CRAN (R 3.4.4)
purrr * 0.2.4 2017-10-18 cran (@0.2.4)
quadprog 1.5-5 2013-04-17 CRAN (R 3.4.0)
R6 2.2.2 2017-06-17 cran (@2.2.2)
Rcpp 0.12.16 2018-03-13 CRAN (R 3.4.4)
readr * 1.1.1 2017-05-16 CRAN (R 3.4.0)
readxl * 1.0.0 2017-04-18 CRAN (R 3.4.0)
reshape2 1.4.3 2017-12-11 CRAN (R 3.4.3)
rlang 0.2.0.9001 2018-04-04 Github (r-lib/rlang@49d7a34)
rprojroot 1.3-2 2018-01-03 CRAN (R 3.4.3)
rstudioapi 0.7.0-9000 2018-04-04 Github (rstudio/rstudioapi@e87b481)
rvest 0.3.2 2016-06-17 CRAN (R 3.4.0)
scales 0.5.0.9000 2018-04-09 Github (hadley/scales@d767915)
selectr 0.4-1 2018-04-06 CRAN (R 3.4.4)
seqinr 3.4-5 2017-08-01 CRAN (R 3.4.1)
sessioninfo 1.0.0 2017-06-21 CRAN (R 3.4.1)
shiny 1.0.5 2017-08-23 cran (@1.0.5)
sp 1.2-7 2018-01-19 cran (@1.2-7)
spData 0.2.8.3 2018-03-25 CRAN (R 3.4.4)
spdep 0.7-7 2018-04-03 CRAN (R 3.4.4)
stringi 1.1.7 2018-03-12 CRAN (R 3.4.4)
stringr * 1.3.0 2018-02-19 cran (@1.3.0)
tibble * 1.4.2 2018-01-22 cran (@1.4.2)
tidyr * 0.8.0 2018-01-29 CRAN (R 3.4.3)
tidyselect 0.2.4 2018-02-26 CRAN (R 3.4.3)
tidyverse * 1.2.1 2017-11-14 CRAN (R 3.4.2)
utf8 1.1.3 2018-01-03 CRAN (R 3.4.3)
vegan 2.4-6 2018-01-24 cran (@2.4-6)
withr 2.1.2 2018-04-04 Github (r-lib/withr@79d7b0d)
xml2 1.2.0 2018-01-24 cran (@1.2.0)
xtable 1.8-2 2016-02-05 CRAN (R 3.4.0)
> options(encoding = enc)
>