-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy path09_data_preparation.jl
More file actions
126 lines (85 loc) · 2.47 KB
/
09_data_preparation.jl
File metadata and controls
126 lines (85 loc) · 2.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# # Preparing data for prediction
using SpeciesDistributionToolkit
using CairoMakie
#
spatial_extent = (left = 5.0, bottom = 57.5, right = 10.0, top = 62.7)
#
rangifer = taxon("Rangifer tarandus tarandus"; strict = false)
query = [
"occurrenceStatus" => "PRESENT",
"hasCoordinate" => true,
"decimalLatitude" => (spatial_extent.bottom, spatial_extent.top),
"decimalLongitude" => (spatial_extent.left, spatial_extent.right),
"limit" => 300,
]
presences = occurrences(rangifer, query...)
for i in 1:3
occurrences!(presences)
end
#
dataprovider = RasterData(CHELSA1, BioClim)
varnames = layerdescriptions(dataprovider)
#
layers = [
convert(
SimpleSDMResponse,
1.0SimpleSDMPredictor(dataprovider; spatial_extent..., layer = lname),
) for
lname in keys(varnames)
]
#
originallayers = deepcopy(layers)
#
presenceonly = mask(layers[1], presences, Bool)
absenceonly = SpeciesDistributionToolkit.sample(
pseudoabsencemask(SurfaceRangeEnvelope, presenceonly),
250,
)
replace!(presenceonly, false => nothing)
replace!(absenceonly, false => nothing)
for cell in absenceonly
presenceonly[cell.longitude, cell.latitude] = false
end
for i in eachindex(layers)
keys_to_void = setdiff(keys(layers[i]), keys(presenceonly))
for k in keys_to_void
layers[i][k] = nothing
end
end
layers
#
refs = Ref.([layers..., presenceonly])
datastack = SimpleSDMStack([values(varnames)..., "Presence"], refs)
predictionstack = SimpleSDMStack([values(varnames)...], Ref.(originallayers))
#
using DataFrames
DataFrame(datastack)
#
using MLJ
#
y, X = unpack(select(DataFrame(datastack), Not([:longitude, :latitude])), ==(:Presence));
y = coerce(y, Continuous)
#
Standardizer = @load Standardizer pkg = MLJModels add = true verbosity = 0
LM = @load LinearRegressor pkg = MLJLinearModels add = true verbosity = 0
model = Standardizer() |> LM()
#
mach = machine(model, X, y) |> fit!
#
perf_measures = [mcc, f1score, accuracy, balanced_accuracy]
evaluate!(
mach;
resampling = CV(; nfolds = 3, shuffle = true, rng = Xoshiro(234)),
measure = perf_measures,
)
#
value = predict(mach, select(DataFrame(predictionstack), Not([:longitude, :latitude])));
#
prediction = select(DataFrame(predictionstack), [:longitude, :latitude]);
prediction.value = value;
#
output = Tables.materializer(SimpleSDMResponse)(prediction)
#
heatmap(sprinkle(output)...; colormap = :viridis)
scatter!(longitudes(presences), latitudes(presences))
current_figure()