ggmlR provides dp_train() for data-parallel training
across multiple GPUs (or CPU cores). Each replica processes a unique
sample per step; gradients are averaged and applied once.
dp_train() takes a model factory
(make_model) instead of a model instance. It creates
n_gpu identical replicas, synchronises their initial
weights, and runs a gradient-accumulation loop:
for each iteration:
each replica → forward(sample_i) → loss → backward
average gradients across replicas
optimizer step on replica 0
broadcast updated weights to all replicas
The effective batch size equals n_gpu (one sample per
replica per step).
data(iris)
set.seed(42)
x_cm <- t(scale(as.matrix(iris[, 1:4]))) # [4, 150]
y_oh <- t(model.matrix(~ Species - 1, iris)) # [3, 150]
# Dataset as list of (x, y) pairs — one sample each
dp_data <- lapply(seq_len(ncol(x_cm)), function(i)
list(x = x_cm[, i, drop = FALSE],
y = y_oh[, i, drop = FALSE]))
# Model factory — called once per replica
make_model <- function() {
ag_sequential(
ag_linear(4L, 32L, activation = "relu"),
ag_linear(32L, 3L)
)
}
result <- dp_train(
make_model = make_model,
data = dp_data,
loss_fn = function(out, tgt) ag_softmax_cross_entropy_loss(out, tgt),
forward_fn = function(model, s) model$forward(ag_tensor(s$x)),
target_fn = function(s) s$y,
n_gpu = 1L, # set to ggml_vulkan_device_count() for multi-GPU
n_iter = 2000L,
lr = 1e-3,
verbose = TRUE
)## [dp_train] iter 1 / 2000 loss = 1.114629
## [dp_train] iter 2 / 2000 loss = 1.138763
## [dp_train] iter 3 / 2000 loss = 1.035583
## [dp_train] iter 4 / 2000 loss = 1.014455
## [dp_train] iter 5 / 2000 loss = 1.003183
## [dp_train] iter 6 / 2000 loss = 0.995121
## [dp_train] iter 7 / 2000 loss = 0.918157
## [dp_train] iter 8 / 2000 loss = 0.964508
## [dp_train] iter 9 / 2000 loss = 0.973214
## [dp_train] iter 10 / 2000 loss = 0.945529
## [dp_train] iter 11 / 2000 loss = 0.978011
## [dp_train] iter 12 / 2000 loss = 0.861131
## [dp_train] iter 13 / 2000 loss = 0.921053
## [dp_train] iter 14 / 2000 loss = 0.851104
## [dp_train] iter 15 / 2000 loss = 0.934950
## [dp_train] iter 16 / 2000 loss = 0.802973
## [dp_train] iter 17 / 2000 loss = 0.828906
## [dp_train] iter 18 / 2000 loss = 0.815112
## [dp_train] iter 19 / 2000 loss = 0.832218
## [dp_train] iter 20 / 2000 loss = 0.709870
## [dp_train] iter 21 / 2000 loss = 0.840676
## [dp_train] iter 22 / 2000 loss = 0.720183
## [dp_train] iter 23 / 2000 loss = 0.629355
## [dp_train] iter 24 / 2000 loss = 0.775001
## [dp_train] iter 25 / 2000 loss = 0.679363
## [dp_train] iter 26 / 2000 loss = 0.781401
## [dp_train] iter 27 / 2000 loss = 0.690848
## [dp_train] iter 28 / 2000 loss = 0.676478
## [dp_train] iter 29 / 2000 loss = 0.686359
## [dp_train] iter 30 / 2000 loss = 0.633403
## [dp_train] iter 31 / 2000 loss = 0.657711
## [dp_train] iter 32 / 2000 loss = 0.709841
## [dp_train] iter 33 / 2000 loss = 0.393512
## [dp_train] iter 34 / 2000 loss = 0.417708
## [dp_train] iter 35 / 2000 loss = 0.626171
## [dp_train] iter 36 / 2000 loss = 0.602501
## [dp_train] iter 37 / 2000 loss = 0.628620
## [dp_train] iter 38 / 2000 loss = 0.476409
## [dp_train] iter 39 / 2000 loss = 0.542329
## [dp_train] iter 40 / 2000 loss = 0.536237
## [dp_train] iter 41 / 2000 loss = 0.482966
## [dp_train] iter 42 / 2000 loss = 0.834887
## [dp_train] iter 43 / 2000 loss = 0.437036
## [dp_train] iter 44 / 2000 loss = 0.487498
## [dp_train] iter 45 / 2000 loss = 0.374394
## [dp_train] iter 46 / 2000 loss = 0.545177
## [dp_train] iter 47 / 2000 loss = 0.343939
## [dp_train] iter 48 / 2000 loss = 0.422825
## [dp_train] iter 49 / 2000 loss = 0.378293
## [dp_train] iter 50 / 2000 loss = 0.443554
## [dp_train] iter 51 / 2000 loss = 0.926830
## [dp_train] iter 52 / 2000 loss = 0.984673
## [dp_train] iter 53 / 2000 loss = 0.982268
## [dp_train] iter 54 / 2000 loss = 1.589730
## [dp_train] iter 55 / 2000 loss = 1.178875
## [dp_train] iter 56 / 2000 loss = 1.209938
## [dp_train] iter 57 / 2000 loss = 0.903024
## [dp_train] iter 58 / 2000 loss = 1.537584
## [dp_train] iter 59 / 2000 loss = 1.054673
## [dp_train] iter 60 / 2000 loss = 1.322923
## [dp_train] iter 61 / 2000 loss = 1.688987
## [dp_train] iter 62 / 2000 loss = 1.082155
## [dp_train] iter 63 / 2000 loss = 1.523728
## [dp_train] iter 64 / 2000 loss = 1.023810
## [dp_train] iter 65 / 2000 loss = 1.242522
## [dp_train] iter 66 / 2000 loss = 0.881996
## [dp_train] iter 67 / 2000 loss = 1.082748
## [dp_train] iter 68 / 2000 loss = 1.200761
## [dp_train] iter 69 / 2000 loss = 1.455653
## [dp_train] iter 70 / 2000 loss = 1.298776
## [dp_train] iter 71 / 2000 loss = 0.882700
## [dp_train] iter 72 / 2000 loss = 1.158856
## [dp_train] iter 73 / 2000 loss = 1.131590
## [dp_train] iter 74 / 2000 loss = 1.008835
## [dp_train] iter 75 / 2000 loss = 0.962918
## [dp_train] iter 76 / 2000 loss = 0.864363
## [dp_train] iter 77 / 2000 loss = 0.865559
## [dp_train] iter 78 / 2000 loss = 0.723573
## [dp_train] iter 79 / 2000 loss = 0.958491
## [dp_train] iter 80 / 2000 loss = 1.233385
## [dp_train] iter 81 / 2000 loss = 1.252715
## [dp_train] iter 82 / 2000 loss = 1.245844
## [dp_train] iter 83 / 2000 loss = 1.130783
## [dp_train] iter 84 / 2000 loss = 0.905593
## [dp_train] iter 85 / 2000 loss = 1.065443
## [dp_train] iter 86 / 2000 loss = 0.820204
## [dp_train] iter 87 / 2000 loss = 0.651053
## [dp_train] iter 88 / 2000 loss = 1.117491
## [dp_train] iter 89 / 2000 loss = 1.087492
## [dp_train] iter 90 / 2000 loss = 1.148101
## [dp_train] iter 91 / 2000 loss = 1.078483
## [dp_train] iter 92 / 2000 loss = 0.809031
## [dp_train] iter 93 / 2000 loss = 1.072865
## [dp_train] iter 94 / 2000 loss = 1.225861
## [dp_train] iter 95 / 2000 loss = 1.054751
## [dp_train] iter 96 / 2000 loss = 1.017171
## [dp_train] iter 97 / 2000 loss = 1.001981
## [dp_train] iter 98 / 2000 loss = 0.829001
## [dp_train] iter 99 / 2000 loss = 1.191810
## [dp_train] iter 100 / 2000 loss = 1.012130
## [dp_train] iter 101 / 2000 loss = 1.777055
## [dp_train] iter 102 / 2000 loss = 1.144097
## [dp_train] iter 103 / 2000 loss = 1.541333
## [dp_train] iter 104 / 2000 loss = 1.333697
## [dp_train] iter 105 / 2000 loss = 1.459265
## [dp_train] iter 106 / 2000 loss = 1.630054
## [dp_train] iter 107 / 2000 loss = 1.128835
## [dp_train] iter 108 / 2000 loss = 1.376752
## [dp_train] iter 109 / 2000 loss = 0.984616
## [dp_train] iter 110 / 2000 loss = 1.877225
## [dp_train] iter 111 / 2000 loss = 1.346935
## [dp_train] iter 112 / 2000 loss = 0.983710
## [dp_train] iter 113 / 2000 loss = 1.161084
## [dp_train] iter 114 / 2000 loss = 0.927573
## [dp_train] iter 115 / 2000 loss = 1.026122
## [dp_train] iter 116 / 2000 loss = 1.235029
## [dp_train] iter 117 / 2000 loss = 1.089501
## [dp_train] iter 118 / 2000 loss = 1.750204
## [dp_train] iter 119 / 2000 loss = 0.797531
## [dp_train] iter 120 / 2000 loss = 0.720216
## [dp_train] iter 121 / 2000 loss = 1.033232
## [dp_train] iter 122 / 2000 loss = 0.971196
## [dp_train] iter 123 / 2000 loss = 0.743344
## [dp_train] iter 124 / 2000 loss = 0.751541
## [dp_train] iter 125 / 2000 loss = 1.016662
## [dp_train] iter 126 / 2000 loss = 0.890509
## [dp_train] iter 127 / 2000 loss = 0.773419
## [dp_train] iter 128 / 2000 loss = 0.901583
## [dp_train] iter 129 / 2000 loss = 0.694409
## [dp_train] iter 130 / 2000 loss = 0.697864
## [dp_train] iter 131 / 2000 loss = 0.524983
## [dp_train] iter 132 / 2000 loss = 1.000413
## [dp_train] iter 133 / 2000 loss = 0.599018
## [dp_train] iter 134 / 2000 loss = 0.735579
## [dp_train] iter 135 / 2000 loss = 0.725725
## [dp_train] iter 136 / 2000 loss = 0.401531
## [dp_train] iter 137 / 2000 loss = 0.834601
## [dp_train] iter 138 / 2000 loss = 0.750487
## [dp_train] iter 139 / 2000 loss = 0.791994
## [dp_train] iter 140 / 2000 loss = 0.515346
## [dp_train] iter 141 / 2000 loss = 0.494603
## [dp_train] iter 142 / 2000 loss = 0.437126
## [dp_train] iter 143 / 2000 loss = 0.648811
## [dp_train] iter 144 / 2000 loss = 0.493104
## [dp_train] iter 145 / 2000 loss = 0.504630
## [dp_train] iter 146 / 2000 loss = 0.387845
## [dp_train] iter 147 / 2000 loss = 0.407937
## [dp_train] iter 148 / 2000 loss = 0.472315
## [dp_train] iter 149 / 2000 loss = 0.673962
## [dp_train] iter 150 / 2000 loss = 0.720916
## [dp_train] iter 151 / 2000 loss = 0.341479
## [dp_train] iter 152 / 2000 loss = 0.456877
## [dp_train] iter 153 / 2000 loss = 0.362497
## [dp_train] iter 154 / 2000 loss = 0.384247
## [dp_train] iter 155 / 2000 loss = 0.289340
## [dp_train] iter 156 / 2000 loss = 0.263448
## [dp_train] iter 157 / 2000 loss = 0.304603
## [dp_train] iter 158 / 2000 loss = 0.344247
## [dp_train] iter 159 / 2000 loss = 0.378390
## [dp_train] iter 160 / 2000 loss = 0.385556
## [dp_train] iter 161 / 2000 loss = 0.264908
## [dp_train] iter 162 / 2000 loss = 0.304100
## [dp_train] iter 163 / 2000 loss = 0.366541
## [dp_train] iter 164 / 2000 loss = 0.270917
## [dp_train] iter 165 / 2000 loss = 0.224308
## [dp_train] iter 166 / 2000 loss = 0.167207
## [dp_train] iter 167 / 2000 loss = 0.203188
## [dp_train] iter 168 / 2000 loss = 0.262750
## [dp_train] iter 169 / 2000 loss = 0.253052
## [dp_train] iter 170 / 2000 loss = 0.173623
## [dp_train] iter 171 / 2000 loss = 0.311578
## [dp_train] iter 172 / 2000 loss = 0.193686
## [dp_train] iter 173 / 2000 loss = 0.151661
## [dp_train] iter 174 / 2000 loss = 0.317222
## [dp_train] iter 175 / 2000 loss = 0.240228
## [dp_train] iter 176 / 2000 loss = 0.329895
## [dp_train] iter 177 / 2000 loss = 0.246777
## [dp_train] iter 178 / 2000 loss = 0.205350
## [dp_train] iter 179 / 2000 loss = 0.222938
## [dp_train] iter 180 / 2000 loss = 0.222684
## [dp_train] iter 181 / 2000 loss = 0.247791
## [dp_train] iter 182 / 2000 loss = 0.251985
## [dp_train] iter 183 / 2000 loss = 0.068167
## [dp_train] iter 184 / 2000 loss = 0.074412
## [dp_train] iter 185 / 2000 loss = 0.230370
## [dp_train] iter 186 / 2000 loss = 0.194945
## [dp_train] iter 187 / 2000 loss = 0.176541
## [dp_train] iter 188 / 2000 loss = 0.110117
## [dp_train] iter 189 / 2000 loss = 0.161545
## [dp_train] iter 190 / 2000 loss = 0.165711
## [dp_train] iter 191 / 2000 loss = 0.132840
## [dp_train] iter 192 / 2000 loss = 0.419076
## [dp_train] iter 193 / 2000 loss = 0.121338
## [dp_train] iter 194 / 2000 loss = 0.156946
## [dp_train] iter 195 / 2000 loss = 0.096979
## [dp_train] iter 196 / 2000 loss = 0.188158
## [dp_train] iter 197 / 2000 loss = 0.075254
## [dp_train] iter 198 / 2000 loss = 0.124134
## [dp_train] iter 199 / 2000 loss = 0.089841
## [dp_train] iter 200 / 2000 loss = 0.134961
## [dp_train] iter 201 / 2000 loss = 1.232185
## [dp_train] iter 202 / 2000 loss = 1.110495
## [dp_train] iter 203 / 2000 loss = 1.313281
## [dp_train] iter 204 / 2000 loss = 1.276473
## [dp_train] iter 205 / 2000 loss = 1.326556
## [dp_train] iter 206 / 2000 loss = 1.080615
## [dp_train] iter 207 / 2000 loss = 1.038684
## [dp_train] iter 208 / 2000 loss = 1.311974
## [dp_train] iter 209 / 2000 loss = 1.204375
## [dp_train] iter 210 / 2000 loss = 1.169826
## [dp_train] iter 211 / 2000 loss = 1.279469
## [dp_train] iter 212 / 2000 loss = 1.055936
## [dp_train] iter 213 / 2000 loss = 1.245835
## [dp_train] iter 214 / 2000 loss = 1.038558
## [dp_train] iter 215 / 2000 loss = 1.173008
## [dp_train] iter 216 / 2000 loss = 1.096789
## [dp_train] iter 217 / 2000 loss = 1.020457
## [dp_train] iter 218 / 2000 loss = 1.058166
## [dp_train] iter 219 / 2000 loss = 1.382732
## [dp_train] iter 220 / 2000 loss = 1.099261
## [dp_train] iter 221 / 2000 loss = 0.955875
## [dp_train] iter 222 / 2000 loss = 1.074612
## [dp_train] iter 223 / 2000 loss = 1.161347
## [dp_train] iter 224 / 2000 loss = 0.968250
## [dp_train] iter 225 / 2000 loss = 1.042399
## [dp_train] iter 226 / 2000 loss = 1.052371
## [dp_train] iter 227 / 2000 loss = 1.155802
## [dp_train] iter 228 / 2000 loss = 1.094569
## [dp_train] iter 229 / 2000 loss = 0.966739
## [dp_train] iter 230 / 2000 loss = 1.066371
## [dp_train] iter 231 / 2000 loss = 1.034995
## [dp_train] iter 232 / 2000 loss = 1.025536
## [dp_train] iter 233 / 2000 loss = 1.002712
## [dp_train] iter 234 / 2000 loss = 0.935921
## [dp_train] iter 235 / 2000 loss = 1.008970
## [dp_train] iter 236 / 2000 loss = 0.926984
## [dp_train] iter 237 / 2000 loss = 0.918352
## [dp_train] iter 238 / 2000 loss = 1.039980
## [dp_train] iter 239 / 2000 loss = 1.043990
## [dp_train] iter 240 / 2000 loss = 0.969169
## [dp_train] iter 241 / 2000 loss = 0.933470
## [dp_train] iter 242 / 2000 loss = 0.854482
## [dp_train] iter 243 / 2000 loss = 0.935922
## [dp_train] iter 244 / 2000 loss = 1.001629
## [dp_train] iter 245 / 2000 loss = 0.933728
## [dp_train] iter 246 / 2000 loss = 0.973436
## [dp_train] iter 247 / 2000 loss = 0.929807
## [dp_train] iter 248 / 2000 loss = 0.857841
## [dp_train] iter 249 / 2000 loss = 1.054668
## [dp_train] iter 250 / 2000 loss = 0.919176
## [dp_train] iter 251 / 2000 loss = 0.697482
## [dp_train] iter 252 / 2000 loss = 0.750251
## [dp_train] iter 253 / 2000 loss = 0.504056
## [dp_train] iter 254 / 2000 loss = 0.712867
## [dp_train] iter 255 / 2000 loss = 0.595762
## [dp_train] iter 256 / 2000 loss = 0.449448
## [dp_train] iter 257 / 2000 loss = 1.073775
## [dp_train] iter 258 / 2000 loss = 0.510572
## [dp_train] iter 259 / 2000 loss = 0.532468
## [dp_train] iter 260 / 2000 loss = 0.543111
## [dp_train] iter 261 / 2000 loss = 0.642289
## [dp_train] iter 262 / 2000 loss = 0.555063
## [dp_train] iter 263 / 2000 loss = 0.473590
## [dp_train] iter 264 / 2000 loss = 0.650764
## [dp_train] iter 265 / 2000 loss = 0.581705
## [dp_train] iter 266 / 2000 loss = 0.543256
## [dp_train] iter 267 / 2000 loss = 0.590149
## [dp_train] iter 268 / 2000 loss = 0.504176
## [dp_train] iter 269 / 2000 loss = 0.215286
## [dp_train] iter 270 / 2000 loss = 0.601704
## [dp_train] iter 271 / 2000 loss = 0.377210
## [dp_train] iter 272 / 2000 loss = 0.705558
## [dp_train] iter 273 / 2000 loss = 0.236937
## [dp_train] iter 274 / 2000 loss = 0.495287
## [dp_train] iter 275 / 2000 loss = 0.443465
## [dp_train] iter 276 / 2000 loss = 0.375798
## [dp_train] iter 277 / 2000 loss = 0.514019
## [dp_train] iter 278 / 2000 loss = 0.595100
## [dp_train] iter 279 / 2000 loss = 0.363163
## [dp_train] iter 280 / 2000 loss = 0.338623
## [dp_train] iter 281 / 2000 loss = 0.208804
## [dp_train] iter 282 / 2000 loss = 0.306805
## [dp_train] iter 283 / 2000 loss = 0.305586
## [dp_train] iter 284 / 2000 loss = 0.536910
## [dp_train] iter 285 / 2000 loss = 0.586601
## [dp_train] iter 286 / 2000 loss = 0.115097
## [dp_train] iter 287 / 2000 loss = 0.377341
## [dp_train] iter 288 / 2000 loss = 0.440610
## [dp_train] iter 289 / 2000 loss = 0.552411
## [dp_train] iter 290 / 2000 loss = 0.221171
## [dp_train] iter 291 / 2000 loss = 0.196364
## [dp_train] iter 292 / 2000 loss = 0.177056
## [dp_train] iter 293 / 2000 loss = 0.487772
## [dp_train] iter 294 / 2000 loss = 0.192248
## [dp_train] iter 295 / 2000 loss = 0.186424
## [dp_train] iter 296 / 2000 loss = 0.172287
## [dp_train] iter 297 / 2000 loss = 0.281200
## [dp_train] iter 298 / 2000 loss = 0.255115
## [dp_train] iter 299 / 2000 loss = 0.339360
## [dp_train] iter 300 / 2000 loss = 0.517626
## [dp_train] iter 301 / 2000 loss = 0.092267
## [dp_train] iter 302 / 2000 loss = 0.163225
## [dp_train] iter 303 / 2000 loss = 0.107219
## [dp_train] iter 304 / 2000 loss = 0.121335
## [dp_train] iter 305 / 2000 loss = 0.071464
## [dp_train] iter 306 / 2000 loss = 0.065830
## [dp_train] iter 307 / 2000 loss = 0.084669
## [dp_train] iter 308 / 2000 loss = 0.102351
## [dp_train] iter 309 / 2000 loss = 0.130893
## [dp_train] iter 310 / 2000 loss = 0.133644
## [dp_train] iter 311 / 2000 loss = 0.070987
## [dp_train] iter 312 / 2000 loss = 0.090176
## [dp_train] iter 313 / 2000 loss = 0.129029
## [dp_train] iter 314 / 2000 loss = 0.076891
## [dp_train] iter 315 / 2000 loss = 0.051006
## [dp_train] iter 316 / 2000 loss = 0.034033
## [dp_train] iter 317 / 2000 loss = 0.048952
## [dp_train] iter 318 / 2000 loss = 0.079394
## [dp_train] iter 319 / 2000 loss = 0.076635
## [dp_train] iter 320 / 2000 loss = 0.045454
## [dp_train] iter 321 / 2000 loss = 0.115775
## [dp_train] iter 322 / 2000 loss = 0.056842
## [dp_train] iter 323 / 2000 loss = 0.034916
## [dp_train] iter 324 / 2000 loss = 0.133934
## [dp_train] iter 325 / 2000 loss = 0.081764
## [dp_train] iter 326 / 2000 loss = 0.143120
## [dp_train] iter 327 / 2000 loss = 0.090039
## [dp_train] iter 328 / 2000 loss = 0.068163
## [dp_train] iter 329 / 2000 loss = 0.077387
## [dp_train] iter 330 / 2000 loss = 0.080612
## [dp_train] iter 331 / 2000 loss = 0.097022
## [dp_train] iter 332 / 2000 loss = 0.100234
## [dp_train] iter 333 / 2000 loss = 0.016823
## [dp_train] iter 334 / 2000 loss = 0.018091
## [dp_train] iter 335 / 2000 loss = 0.092847
## [dp_train] iter 336 / 2000 loss = 0.073743
## [dp_train] iter 337 / 2000 loss = 0.063545
## [dp_train] iter 338 / 2000 loss = 0.033331
## [dp_train] iter 339 / 2000 loss = 0.060523
## [dp_train] iter 340 / 2000 loss = 0.061898
## [dp_train] iter 341 / 2000 loss = 0.045799
## [dp_train] iter 342 / 2000 loss = 0.295304
## [dp_train] iter 343 / 2000 loss = 0.042164
## [dp_train] iter 344 / 2000 loss = 0.064031
## [dp_train] iter 345 / 2000 loss = 0.035694
## [dp_train] iter 346 / 2000 loss = 0.082617
## [dp_train] iter 347 / 2000 loss = 0.024906
## [dp_train] iter 348 / 2000 loss = 0.047123
## [dp_train] iter 349 / 2000 loss = 0.032031
## [dp_train] iter 350 / 2000 loss = 0.054219
## [dp_train] iter 351 / 2000 loss = 1.510402
## [dp_train] iter 352 / 2000 loss = 1.261568
## [dp_train] iter 353 / 2000 loss = 1.614395
## [dp_train] iter 354 / 2000 loss = 1.033103
## [dp_train] iter 355 / 2000 loss = 1.445791
## [dp_train] iter 356 / 2000 loss = 0.983821
## [dp_train] iter 357 / 2000 loss = 1.195374
## [dp_train] iter 358 / 2000 loss = 1.072438
## [dp_train] iter 359 / 2000 loss = 1.287118
## [dp_train] iter 360 / 2000 loss = 0.992965
## [dp_train] iter 361 / 2000 loss = 0.905157
## [dp_train] iter 362 / 2000 loss = 1.042266
## [dp_train] iter 363 / 2000 loss = 0.994290
## [dp_train] iter 364 / 2000 loss = 1.033541
## [dp_train] iter 365 / 2000 loss = 1.052600
## [dp_train] iter 366 / 2000 loss = 1.206279
## [dp_train] iter 367 / 2000 loss = 0.957780
## [dp_train] iter 368 / 2000 loss = 0.907289
## [dp_train] iter 369 / 2000 loss = 1.252892
## [dp_train] iter 370 / 2000 loss = 0.891616
## [dp_train] iter 371 / 2000 loss = 1.027878
## [dp_train] iter 372 / 2000 loss = 0.976384
## [dp_train] iter 373 / 2000 loss = 1.125947
## [dp_train] iter 374 / 2000 loss = 0.888701
## [dp_train] iter 375 / 2000 loss = 1.020827
## [dp_train] iter 376 / 2000 loss = 1.105669
## [dp_train] iter 377 / 2000 loss = 1.229346
## [dp_train] iter 378 / 2000 loss = 1.287556
## [dp_train] iter 379 / 2000 loss = 0.926616
## [dp_train] iter 380 / 2000 loss = 0.868104
## [dp_train] iter 381 / 2000 loss = 0.798286
## [dp_train] iter 382 / 2000 loss = 0.787436
## [dp_train] iter 383 / 2000 loss = 0.851612
## [dp_train] iter 384 / 2000 loss = 0.888317
## [dp_train] iter 385 / 2000 loss = 0.905477
## [dp_train] iter 386 / 2000 loss = 0.941785
## [dp_train] iter 387 / 2000 loss = 0.993980
## [dp_train] iter 388 / 2000 loss = 0.864459
## [dp_train] iter 389 / 2000 loss = 0.932711
## [dp_train] iter 390 / 2000 loss = 0.761034
## [dp_train] iter 391 / 2000 loss = 0.756675
## [dp_train] iter 392 / 2000 loss = 0.812222
## [dp_train] iter 393 / 2000 loss = 0.766382
## [dp_train] iter 394 / 2000 loss = 0.739705
## [dp_train] iter 395 / 2000 loss = 0.778563
## [dp_train] iter 396 / 2000 loss = 0.868358
## [dp_train] iter 397 / 2000 loss = 0.811055
## [dp_train] iter 398 / 2000 loss = 0.779428
## [dp_train] iter 399 / 2000 loss = 0.849720
## [dp_train] iter 400 / 2000 loss = 0.783045
## [dp_train] iter 401 / 2000 loss = 0.438599
## [dp_train] iter 402 / 2000 loss = 0.698762
## [dp_train] iter 403 / 2000 loss = 0.346922
## [dp_train] iter 404 / 2000 loss = 0.602822
## [dp_train] iter 405 / 2000 loss = 0.428567
## [dp_train] iter 406 / 2000 loss = 0.285224
## [dp_train] iter 407 / 2000 loss = 1.122328
## [dp_train] iter 408 / 2000 loss = 0.383642
## [dp_train] iter 409 / 2000 loss = 0.494954
## [dp_train] iter 410 / 2000 loss = 0.293295
## [dp_train] iter 411 / 2000 loss = 0.469301
## [dp_train] iter 412 / 2000 loss = 0.490221
## [dp_train] iter 413 / 2000 loss = 0.342957
## [dp_train] iter 414 / 2000 loss = 0.631097
## [dp_train] iter 415 / 2000 loss = 0.473675
## [dp_train] iter 416 / 2000 loss = 0.369075
## [dp_train] iter 417 / 2000 loss = 0.476717
## [dp_train] iter 418 / 2000 loss = 0.272723
## [dp_train] iter 419 / 2000 loss = 0.142231
## [dp_train] iter 420 / 2000 loss = 0.679975
## [dp_train] iter 421 / 2000 loss = 0.235467
## [dp_train] iter 422 / 2000 loss = 0.636380
## [dp_train] iter 423 / 2000 loss = 0.162582
## [dp_train] iter 424 / 2000 loss = 0.451496
## [dp_train] iter 425 / 2000 loss = 0.295024
## [dp_train] iter 426 / 2000 loss = 0.266664
## [dp_train] iter 427 / 2000 loss = 0.460031
## [dp_train] iter 428 / 2000 loss = 0.503118
## [dp_train] iter 429 / 2000 loss = 0.286253
## [dp_train] iter 430 / 2000 loss = 0.268241
## [dp_train] iter 431 / 2000 loss = 0.155550
## [dp_train] iter 432 / 2000 loss = 0.175104
## [dp_train] iter 433 / 2000 loss = 0.232646
## [dp_train] iter 434 / 2000 loss = 0.505170
## [dp_train] iter 435 / 2000 loss = 0.598051
## [dp_train] iter 436 / 2000 loss = 0.068517
## [dp_train] iter 437 / 2000 loss = 0.236725
## [dp_train] iter 438 / 2000 loss = 0.343955
## [dp_train] iter 439 / 2000 loss = 0.467516
## [dp_train] iter 440 / 2000 loss = 0.151531
## [dp_train] iter 441 / 2000 loss = 0.125055
## [dp_train] iter 442 / 2000 loss = 0.116630
## [dp_train] iter 443 / 2000 loss = 0.442000
## [dp_train] iter 444 / 2000 loss = 0.119766
## [dp_train] iter 445 / 2000 loss = 0.109681
## [dp_train] iter 446 / 2000 loss = 0.118611
## [dp_train] iter 447 / 2000 loss = 0.261510
## [dp_train] iter 448 / 2000 loss = 0.195049
## [dp_train] iter 449 / 2000 loss = 0.224321
## [dp_train] iter 450 / 2000 loss = 0.436486
## [dp_train] iter 451 / 2000 loss = 0.037580
## [dp_train] iter 452 / 2000 loss = 0.085461
## [dp_train] iter 453 / 2000 loss = 0.045513
## [dp_train] iter 454 / 2000 loss = 0.056744
## [dp_train] iter 455 / 2000 loss = 0.027681
## [dp_train] iter 456 / 2000 loss = 0.026063
## [dp_train] iter 457 / 2000 loss = 0.033165
## [dp_train] iter 458 / 2000 loss = 0.043131
## [dp_train] iter 459 / 2000 loss = 0.068186
## [dp_train] iter 460 / 2000 loss = 0.067063
## [dp_train] iter 461 / 2000 loss = 0.029263
## [dp_train] iter 462 / 2000 loss = 0.037362
## [dp_train] iter 463 / 2000 loss = 0.067228
## [dp_train] iter 464 / 2000 loss = 0.034030
## [dp_train] iter 465 / 2000 loss = 0.018730
## [dp_train] iter 466 / 2000 loss = 0.011610
## [dp_train] iter 467 / 2000 loss = 0.018972
## [dp_train] iter 468 / 2000 loss = 0.035045
## [dp_train] iter 469 / 2000 loss = 0.033873
## [dp_train] iter 470 / 2000 loss = 0.018436
## [dp_train] iter 471 / 2000 loss = 0.059252
## [dp_train] iter 472 / 2000 loss = 0.024852
## [dp_train] iter 473 / 2000 loss = 0.012749
## [dp_train] iter 474 / 2000 loss = 0.071140
## [dp_train] iter 475 / 2000 loss = 0.037809
## [dp_train] iter 476 / 2000 loss = 0.085446
## [dp_train] iter 477 / 2000 loss = 0.044119
## [dp_train] iter 478 / 2000 loss = 0.032071
## [dp_train] iter 479 / 2000 loss = 0.037329
## [dp_train] iter 480 / 2000 loss = 0.040383
## [dp_train] iter 481 / 2000 loss = 0.053858
## [dp_train] iter 482 / 2000 loss = 0.053428
## [dp_train] iter 483 / 2000 loss = 0.006533
## [dp_train] iter 484 / 2000 loss = 0.006943
## [dp_train] iter 485 / 2000 loss = 0.052296
## [dp_train] iter 486 / 2000 loss = 0.037567
## [dp_train] iter 487 / 2000 loss = 0.031818
## [dp_train] iter 488 / 2000 loss = 0.014590
## [dp_train] iter 489 / 2000 loss = 0.032252
## [dp_train] iter 490 / 2000 loss = 0.030804
## [dp_train] iter 491 / 2000 loss = 0.021853
## [dp_train] iter 492 / 2000 loss = 0.276474
## [dp_train] iter 493 / 2000 loss = 0.020489
## [dp_train] iter 494 / 2000 loss = 0.034533
## [dp_train] iter 495 / 2000 loss = 0.017777
## [dp_train] iter 496 / 2000 loss = 0.050067
## [dp_train] iter 497 / 2000 loss = 0.011497
## [dp_train] iter 498 / 2000 loss = 0.024019
## [dp_train] iter 499 / 2000 loss = 0.015592
## [dp_train] iter 500 / 2000 loss = 0.027701
## [dp_train] iter 501 / 2000 loss = 1.552223
## [dp_train] iter 502 / 2000 loss = 1.281699
## [dp_train] iter 503 / 2000 loss = 1.674122
## [dp_train] iter 504 / 2000 loss = 0.784929
## [dp_train] iter 505 / 2000 loss = 1.413463
## [dp_train] iter 506 / 2000 loss = 0.861124
## [dp_train] iter 507 / 2000 loss = 1.249711
## [dp_train] iter 508 / 2000 loss = 0.814301
## [dp_train] iter 509 / 2000 loss = 1.229274
## [dp_train] iter 510 / 2000 loss = 0.817647
## [dp_train] iter 511 / 2000 loss = 0.590171
## [dp_train] iter 512 / 2000 loss = 0.982471
## [dp_train] iter 513 / 2000 loss = 0.745078
## [dp_train] iter 514 / 2000 loss = 0.966002
## [dp_train] iter 515 / 2000 loss = 0.911750
## [dp_train] iter 516 / 2000 loss = 1.169909
## [dp_train] iter 517 / 2000 loss = 0.885968
## [dp_train] iter 518 / 2000 loss = 0.745116
## [dp_train] iter 519 / 2000 loss = 1.047725
## [dp_train] iter 520 / 2000 loss = 0.682862
## [dp_train] iter 521 / 2000 loss = 1.061320
## [dp_train] iter 522 / 2000 loss = 0.848065
## [dp_train] iter 523 / 2000 loss = 1.003125
## [dp_train] iter 524 / 2000 loss = 0.770269
## [dp_train] iter 525 / 2000 loss = 0.926732
## [dp_train] iter 526 / 2000 loss = 1.038988
## [dp_train] iter 527 / 2000 loss = 1.158157
## [dp_train] iter 528 / 2000 loss = 1.320380
## [dp_train] iter 529 / 2000 loss = 0.850499
## [dp_train] iter 530 / 2000 loss = 0.670227
## [dp_train] iter 531 / 2000 loss = 0.577848
## [dp_train] iter 532 / 2000 loss = 0.564382
## [dp_train] iter 533 / 2000 loss = 0.685413
## [dp_train] iter 534 / 2000 loss = 0.795626
## [dp_train] iter 535 / 2000 loss = 0.790908
## [dp_train] iter 536 / 2000 loss = 0.906734
## [dp_train] iter 537 / 2000 loss = 0.955005
## [dp_train] iter 538 / 2000 loss = 0.660634
## [dp_train] iter 539 / 2000 loss = 0.797876
## [dp_train] iter 540 / 2000 loss = 0.564183
## [dp_train] iter 541 / 2000 loss = 0.576238
## [dp_train] iter 542 / 2000 loss = 0.734669
## [dp_train] iter 543 / 2000 loss = 0.587545
## [dp_train] iter 544 / 2000 loss = 0.496905
## [dp_train] iter 545 / 2000 loss = 0.611850
## [dp_train] iter 546 / 2000 loss = 0.741233
## [dp_train] iter 547 / 2000 loss = 0.676624
## [dp_train] iter 548 / 2000 loss = 0.674574
## [dp_train] iter 549 / 2000 loss = 0.637860
## [dp_train] iter 550 / 2000 loss = 0.628119
## [dp_train] iter 551 / 2000 loss = 0.334177
## [dp_train] iter 552 / 2000 loss = 0.724802
## [dp_train] iter 553 / 2000 loss = 0.307327
## [dp_train] iter 554 / 2000 loss = 0.592272
## [dp_train] iter 555 / 2000 loss = 0.375457
## [dp_train] iter 556 / 2000 loss = 0.246832
## [dp_train] iter 557 / 2000 loss = 1.255722
## [dp_train] iter 558 / 2000 loss = 0.368126
## [dp_train] iter 559 / 2000 loss = 0.541291
## [dp_train] iter 560 / 2000 loss = 0.213753
## [dp_train] iter 561 / 2000 loss = 0.408039
## [dp_train] iter 562 / 2000 loss = 0.497970
## [dp_train] iter 563 / 2000 loss = 0.301677
## [dp_train] iter 564 / 2000 loss = 0.679551
## [dp_train] iter 565 / 2000 loss = 0.423267
## [dp_train] iter 566 / 2000 loss = 0.295642
## [dp_train] iter 567 / 2000 loss = 0.445102
## [dp_train] iter 568 / 2000 loss = 0.198523
## [dp_train] iter 569 / 2000 loss = 0.123783
## [dp_train] iter 570 / 2000 loss = 0.836663
## [dp_train] iter 571 / 2000 loss = 0.179926
## [dp_train] iter 572 / 2000 loss = 0.608842
## [dp_train] iter 573 / 2000 loss = 0.141975
## [dp_train] iter 574 / 2000 loss = 0.456645
## [dp_train] iter 575 / 2000 loss = 0.229682
## [dp_train] iter 576 / 2000 loss = 0.235230
## [dp_train] iter 577 / 2000 loss = 0.450681
## [dp_train] iter 578 / 2000 loss = 0.465152
## [dp_train] iter 579 / 2000 loss = 0.255643
## [dp_train] iter 580 / 2000 loss = 0.254609
## [dp_train] iter 581 / 2000 loss = 0.138957
## [dp_train] iter 582 / 2000 loss = 0.131444
## [dp_train] iter 583 / 2000 loss = 0.199324
## [dp_train] iter 584 / 2000 loss = 0.516697
## [dp_train] iter 585 / 2000 loss = 0.655193
## [dp_train] iter 586 / 2000 loss = 0.051043
## [dp_train] iter 587 / 2000 loss = 0.167899
## [dp_train] iter 588 / 2000 loss = 0.298053
## [dp_train] iter 589 / 2000 loss = 0.422748
## [dp_train] iter 590 / 2000 loss = 0.119861
## [dp_train] iter 591 / 2000 loss = 0.090075
## [dp_train] iter 592 / 2000 loss = 0.087909
## [dp_train] iter 593 / 2000 loss = 0.418711
## [dp_train] iter 594 / 2000 loss = 0.085036
## [dp_train] iter 595 / 2000 loss = 0.073724
## [dp_train] iter 596 / 2000 loss = 0.091279
## [dp_train] iter 597 / 2000 loss = 0.258358
## [dp_train] iter 598 / 2000 loss = 0.163326
## [dp_train] iter 599 / 2000 loss = 0.162973
## [dp_train] iter 600 / 2000 loss = 0.387231
## [dp_train] iter 601 / 2000 loss = 0.019243
## [dp_train] iter 602 / 2000 loss = 0.057625
## [dp_train] iter 603 / 2000 loss = 0.025951
## [dp_train] iter 604 / 2000 loss = 0.034537
## [dp_train] iter 605 / 2000 loss = 0.013724
## [dp_train] iter 606 / 2000 loss = 0.013886
## [dp_train] iter 607 / 2000 loss = 0.016683
## [dp_train] iter 608 / 2000 loss = 0.023168
## [dp_train] iter 609 / 2000 loss = 0.046932
## [dp_train] iter 610 / 2000 loss = 0.042333
## [dp_train] iter 611 / 2000 loss = 0.015441
## [dp_train] iter 612 / 2000 loss = 0.019588
## [dp_train] iter 613 / 2000 loss = 0.043552
## [dp_train] iter 614 / 2000 loss = 0.020320
## [dp_train] iter 615 / 2000 loss = 0.009035
## [dp_train] iter 616 / 2000 loss = 0.005359
## [dp_train] iter 617 / 2000 loss = 0.009653
## [dp_train] iter 618 / 2000 loss = 0.018699
## [dp_train] iter 619 / 2000 loss = 0.018576
## [dp_train] iter 620 / 2000 loss = 0.009574
## [dp_train] iter 621 / 2000 loss = 0.035664
## [dp_train] iter 622 / 2000 loss = 0.013504
## [dp_train] iter 623 / 2000 loss = 0.005961
## [dp_train] iter 624 / 2000 loss = 0.043823
## [dp_train] iter 625 / 2000 loss = 0.021366
## [dp_train] iter 626 / 2000 loss = 0.060861
## [dp_train] iter 627 / 2000 loss = 0.025432
## [dp_train] iter 628 / 2000 loss = 0.018034
## [dp_train] iter 629 / 2000 loss = 0.021300
## [dp_train] iter 630 / 2000 loss = 0.025241
## [dp_train] iter 631 / 2000 loss = 0.035935
## [dp_train] iter 632 / 2000 loss = 0.033116
## [dp_train] iter 633 / 2000 loss = 0.003250
## [dp_train] iter 634 / 2000 loss = 0.003399
## [dp_train] iter 635 / 2000 loss = 0.035249
## [dp_train] iter 636 / 2000 loss = 0.023362
## [dp_train] iter 637 / 2000 loss = 0.019007
## [dp_train] iter 638 / 2000 loss = 0.007550
## [dp_train] iter 639 / 2000 loss = 0.021558
## [dp_train] iter 640 / 2000 loss = 0.018058
## [dp_train] iter 641 / 2000 loss = 0.012113
## [dp_train] iter 642 / 2000 loss = 0.297442
## [dp_train] iter 643 / 2000 loss = 0.012080
## [dp_train] iter 644 / 2000 loss = 0.020909
## [dp_train] iter 645 / 2000 loss = 0.010442
## [dp_train] iter 646 / 2000 loss = 0.035874
## [dp_train] iter 647 / 2000 loss = 0.006322
## [dp_train] iter 648 / 2000 loss = 0.014618
## [dp_train] iter 649 / 2000 loss = 0.008832
## [dp_train] iter 650 / 2000 loss = 0.016515
## [dp_train] iter 651 / 2000 loss = 1.520860
## [dp_train] iter 652 / 2000 loss = 1.262271
## [dp_train] iter 653 / 2000 loss = 1.657097
## [dp_train] iter 654 / 2000 loss = 0.579350
## [dp_train] iter 655 / 2000 loss = 1.354177
## [dp_train] iter 656 / 2000 loss = 0.738769
## [dp_train] iter 657 / 2000 loss = 1.262812
## [dp_train] iter 658 / 2000 loss = 0.590932
## [dp_train] iter 659 / 2000 loss = 1.137545
## [dp_train] iter 660 / 2000 loss = 0.654645
## [dp_train] iter 661 / 2000 loss = 0.362635
## [dp_train] iter 662 / 2000 loss = 0.922501
## [dp_train] iter 663 / 2000 loss = 0.521673
## [dp_train] iter 664 / 2000 loss = 0.895016
## [dp_train] iter 665 / 2000 loss = 0.769152
## [dp_train] iter 666 / 2000 loss = 1.093417
## [dp_train] iter 667 / 2000 loss = 0.806311
## [dp_train] iter 668 / 2000 loss = 0.584985
## [dp_train] iter 669 / 2000 loss = 0.865396
## [dp_train] iter 670 / 2000 loss = 0.502529
## [dp_train] iter 671 / 2000 loss = 1.094612
## [dp_train] iter 672 / 2000 loss = 0.723600
## [dp_train] iter 673 / 2000 loss = 0.886028
## [dp_train] iter 674 / 2000 loss = 0.656744
## [dp_train] iter 675 / 2000 loss = 0.821224
## [dp_train] iter 676 / 2000 loss = 0.944595
## [dp_train] iter 677 / 2000 loss = 1.060309
## [dp_train] iter 678 / 2000 loss = 1.313132
## [dp_train] iter 679 / 2000 loss = 0.779532
## [dp_train] iter 680 / 2000 loss = 0.495985
## [dp_train] iter 681 / 2000 loss = 0.398465
## [dp_train] iter 682 / 2000 loss = 0.383602
## [dp_train] iter 683 / 2000 loss = 0.532589
## [dp_train] iter 684 / 2000 loss = 0.716550
## [dp_train] iter 685 / 2000 loss = 0.686231
## [dp_train] iter 686 / 2000 loss = 0.860126
## [dp_train] iter 687 / 2000 loss = 0.881003
## [dp_train] iter 688 / 2000 loss = 0.498433
## [dp_train] iter 689 / 2000 loss = 0.672247
## [dp_train] iter 690 / 2000 loss = 0.403966
## [dp_train] iter 691 / 2000 loss = 0.424555
## [dp_train] iter 692 / 2000 loss = 0.661766
## [dp_train] iter 693 / 2000 loss = 0.434794
## [dp_train] iter 694 / 2000 loss = 0.311494
## [dp_train] iter 695 / 2000 loss = 0.467903
## [dp_train] iter 696 / 2000 loss = 0.617666
## [dp_train] iter 697 / 2000 loss = 0.553262
## [dp_train] iter 698 / 2000 loss = 0.573317
## [dp_train] iter 699 / 2000 loss = 0.456389
## [dp_train] iter 700 / 2000 loss = 0.492540
## [dp_train] iter 701 / 2000 loss = 0.261178
## [dp_train] iter 702 / 2000 loss = 0.756860
## [dp_train] iter 703 / 2000 loss = 0.284928
## [dp_train] iter 704 / 2000 loss = 0.590273
## [dp_train] iter 705 / 2000 loss = 0.335218
## [dp_train] iter 706 / 2000 loss = 0.235469
## [dp_train] iter 707 / 2000 loss = 1.436291
## [dp_train] iter 708 / 2000 loss = 0.368560
## [dp_train] iter 709 / 2000 loss = 0.585590
## [dp_train] iter 710 / 2000 loss = 0.171488
## [dp_train] iter 711 / 2000 loss = 0.376698
## [dp_train] iter 712 / 2000 loss = 0.504566
## [dp_train] iter 713 / 2000 loss = 0.272714
## [dp_train] iter 714 / 2000 loss = 0.738127
## [dp_train] iter 715 / 2000 loss = 0.369018
## [dp_train] iter 716 / 2000 loss = 0.243395
## [dp_train] iter 717 / 2000 loss = 0.423674
## [dp_train] iter 718 / 2000 loss = 0.159751
## [dp_train] iter 719 / 2000 loss = 0.108251
## [dp_train] iter 720 / 2000 loss = 1.017749
## [dp_train] iter 721 / 2000 loss = 0.146010
## [dp_train] iter 722 / 2000 loss = 0.584348
## [dp_train] iter 723 / 2000 loss = 0.125852
## [dp_train] iter 724 / 2000 loss = 0.461411
## [dp_train] iter 725 / 2000 loss = 0.190843
## [dp_train] iter 726 / 2000 loss = 0.218768
## [dp_train] iter 727 / 2000 loss = 0.443406
## [dp_train] iter 728 / 2000 loss = 0.436272
## [dp_train] iter 729 / 2000 loss = 0.226474
## [dp_train] iter 730 / 2000 loss = 0.256561
## [dp_train] iter 731 / 2000 loss = 0.125023
## [dp_train] iter 732 / 2000 loss = 0.108539
## [dp_train] iter 733 / 2000 loss = 0.168998
## [dp_train] iter 734 / 2000 loss = 0.533798
## [dp_train] iter 735 / 2000 loss = 0.715814
## [dp_train] iter 736 / 2000 loss = 0.041213
## [dp_train] iter 737 / 2000 loss = 0.122146
## [dp_train] iter 738 / 2000 loss = 0.264242
## [dp_train] iter 739 / 2000 loss = 0.389013
## [dp_train] iter 740 / 2000 loss = 0.098716
## [dp_train] iter 741 / 2000 loss = 0.066084
## [dp_train] iter 742 / 2000 loss = 0.069262
## [dp_train] iter 743 / 2000 loss = 0.394299
## [dp_train] iter 744 / 2000 loss = 0.061780
## [dp_train] iter 745 / 2000 loss = 0.051495
## [dp_train] iter 746 / 2000 loss = 0.070896
## [dp_train] iter 747 / 2000 loss = 0.250211
## [dp_train] iter 748 / 2000 loss = 0.138807
## [dp_train] iter 749 / 2000 loss = 0.121834
## [dp_train] iter 750 / 2000 loss = 0.348794
## [dp_train] iter 751 / 2000 loss = 0.011447
## [dp_train] iter 752 / 2000 loss = 0.044329
## [dp_train] iter 753 / 2000 loss = 0.017006
## [dp_train] iter 754 / 2000 loss = 0.024056
## [dp_train] iter 755 / 2000 loss = 0.007705
## [dp_train] iter 756 / 2000 loss = 0.008392
## [dp_train] iter 757 / 2000 loss = 0.009792
## [dp_train] iter 758 / 2000 loss = 0.014439
## [dp_train] iter 759 / 2000 loss = 0.036993
## [dp_train] iter 760 / 2000 loss = 0.030308
## [dp_train] iter 761 / 2000 loss = 0.009334
## [dp_train] iter 762 / 2000 loss = 0.012229
## [dp_train] iter 763 / 2000 loss = 0.033140
## [dp_train] iter 764 / 2000 loss = 0.014414
## [dp_train] iter 765 / 2000 loss = 0.005002
## [dp_train] iter 766 / 2000 loss = 0.002865
## [dp_train] iter 767 / 2000 loss = 0.005673
## [dp_train] iter 768 / 2000 loss = 0.011211
## [dp_train] iter 769 / 2000 loss = 0.011550
## [dp_train] iter 770 / 2000 loss = 0.005562
## [dp_train] iter 771 / 2000 loss = 0.023899
## [dp_train] iter 772 / 2000 loss = 0.008078
## [dp_train] iter 773 / 2000 loss = 0.003200
## [dp_train] iter 774 / 2000 loss = 0.030776
## [dp_train] iter 775 / 2000 loss = 0.014051
## [dp_train] iter 776 / 2000 loss = 0.049132
## [dp_train] iter 777 / 2000 loss = 0.016129
## [dp_train] iter 778 / 2000 loss = 0.011243
## [dp_train] iter 779 / 2000 loss = 0.013668
## [dp_train] iter 780 / 2000 loss = 0.017440
## [dp_train] iter 781 / 2000 loss = 0.026458
## [dp_train] iter 782 / 2000 loss = 0.022305
## [dp_train] iter 783 / 2000 loss = 0.001813
## [dp_train] iter 784 / 2000 loss = 0.001891
## [dp_train] iter 785 / 2000 loss = 0.026051
## [dp_train] iter 786 / 2000 loss = 0.016094
## [dp_train] iter 787 / 2000 loss = 0.012179
## [dp_train] iter 788 / 2000 loss = 0.004363
## [dp_train] iter 789 / 2000 loss = 0.016295
## [dp_train] iter 790 / 2000 loss = 0.011771
## [dp_train] iter 791 / 2000 loss = 0.007348
## [dp_train] iter 792 / 2000 loss = 0.334790
## [dp_train] iter 793 / 2000 loss = 0.007927
## [dp_train] iter 794 / 2000 loss = 0.013648
## [dp_train] iter 795 / 2000 loss = 0.006621
## [dp_train] iter 796 / 2000 loss = 0.027931
## [dp_train] iter 797 / 2000 loss = 0.003732
## [dp_train] iter 798 / 2000 loss = 0.009704
## [dp_train] iter 799 / 2000 loss = 0.005316
## [dp_train] iter 800 / 2000 loss = 0.010925
## [dp_train] iter 801 / 2000 loss = 1.453040
## [dp_train] iter 802 / 2000 loss = 1.224515
## [dp_train] iter 803 / 2000 loss = 1.614151
## [dp_train] iter 804 / 2000 loss = 0.426104
## [dp_train] iter 805 / 2000 loss = 1.299294
## [dp_train] iter 806 / 2000 loss = 0.631496
## [dp_train] iter 807 / 2000 loss = 1.259786
## [dp_train] iter 808 / 2000 loss = 0.414877
## [dp_train] iter 809 / 2000 loss = 1.023973
## [dp_train] iter 810 / 2000 loss = 0.522874
## [dp_train] iter 811 / 2000 loss = 0.216758
## [dp_train] iter 812 / 2000 loss = 0.866378
## [dp_train] iter 813 / 2000 loss = 0.359523
## [dp_train] iter 814 / 2000 loss = 0.836582
## [dp_train] iter 815 / 2000 loss = 0.645379
## [dp_train] iter 816 / 2000 loss = 1.004766
## [dp_train] iter 817 / 2000 loss = 0.739340
## [dp_train] iter 818 / 2000 loss = 0.452744
## [dp_train] iter 819 / 2000 loss = 0.729213
## [dp_train] iter 820 / 2000 loss = 0.365617
## [dp_train] iter 821 / 2000 loss = 1.136305
## [dp_train] iter 822 / 2000 loss = 0.611653
## [dp_train] iter 823 / 2000 loss = 0.801539
## [dp_train] iter 824 / 2000 loss = 0.562892
## [dp_train] iter 825 / 2000 loss = 0.713703
## [dp_train] iter 826 / 2000 loss = 0.850650
## [dp_train] iter 827 / 2000 loss = 0.950018
## [dp_train] iter 828 / 2000 loss = 1.284359
## [dp_train] iter 829 / 2000 loss = 0.724905
## [dp_train] iter 830 / 2000 loss = 0.362653
## [dp_train] iter 831 / 2000 loss = 0.272616
## [dp_train] iter 832 / 2000 loss = 0.258136
## [dp_train] iter 833 / 2000 loss = 0.412115
## [dp_train] iter 834 / 2000 loss = 0.671087
## [dp_train] iter 835 / 2000 loss = 0.599525
## [dp_train] iter 836 / 2000 loss = 0.820378
## [dp_train] iter 837 / 2000 loss = 0.810545
## [dp_train] iter 838 / 2000 loss = 0.381160
## [dp_train] iter 839 / 2000 loss = 0.567393
## [dp_train] iter 840 / 2000 loss = 0.290284
## [dp_train] iter 841 / 2000 loss = 0.313275
## [dp_train] iter 842 / 2000 loss = 0.601384
## [dp_train] iter 843 / 2000 loss = 0.322233
## [dp_train] iter 844 / 2000 loss = 0.195156
## [dp_train] iter 845 / 2000 loss = 0.358779
## [dp_train] iter 846 / 2000 loss = 0.514665
## [dp_train] iter 847 / 2000 loss = 0.453664
## [dp_train] iter 848 / 2000 loss = 0.489402
## [dp_train] iter 849 / 2000 loss = 0.327350
## [dp_train] iter 850 / 2000 loss = 0.388905
## [dp_train] iter 851 / 2000 loss = 0.202640
## [dp_train] iter 852 / 2000 loss = 0.767908
## [dp_train] iter 853 / 2000 loss = 0.270133
## [dp_train] iter 854 / 2000 loss = 0.574781
## [dp_train] iter 855 / 2000 loss = 0.290442
## [dp_train] iter 856 / 2000 loss = 0.219869
## [dp_train] iter 857 / 2000 loss = 1.602901
## [dp_train] iter 858 / 2000 loss = 0.371556
## [dp_train] iter 859 / 2000 loss = 0.595766
## [dp_train] iter 860 / 2000 loss = 0.140186
## [dp_train] iter 861 / 2000 loss = 0.354558
## [dp_train] iter 862 / 2000 loss = 0.491028
## [dp_train] iter 863 / 2000 loss = 0.248997
## [dp_train] iter 864 / 2000 loss = 0.755991
## [dp_train] iter 865 / 2000 loss = 0.309933
## [dp_train] iter 866 / 2000 loss = 0.202046
## [dp_train] iter 867 / 2000 loss = 0.400337
## [dp_train] iter 868 / 2000 loss = 0.134818
## [dp_train] iter 869 / 2000 loss = 0.089534
## [dp_train] iter 870 / 2000 loss = 1.171643
## [dp_train] iter 871 / 2000 loss = 0.121850
## [dp_train] iter 872 / 2000 loss = 0.550061
## [dp_train] iter 873 / 2000 loss = 0.115123
## [dp_train] iter 874 / 2000 loss = 0.453303
## [dp_train] iter 875 / 2000 loss = 0.164261
## [dp_train] iter 876 / 2000 loss = 0.209325
## [dp_train] iter 877 / 2000 loss = 0.427807
## [dp_train] iter 878 / 2000 loss = 0.408367
## [dp_train] iter 879 / 2000 loss = 0.194480
## [dp_train] iter 880 / 2000 loss = 0.261989
## [dp_train] iter 881 / 2000 loss = 0.115098
## [dp_train] iter 882 / 2000 loss = 0.097063
## [dp_train] iter 883 / 2000 loss = 0.139157
## [dp_train] iter 884 / 2000 loss = 0.544536
## [dp_train] iter 885 / 2000 loss = 0.760340
## [dp_train] iter 886 / 2000 loss = 0.033867
## [dp_train] iter 887 / 2000 loss = 0.091768
## [dp_train] iter 888 / 2000 loss = 0.236248
## [dp_train] iter 889 / 2000 loss = 0.358035
## [dp_train] iter 890 / 2000 loss = 0.085281
## [dp_train] iter 891 / 2000 loss = 0.049337
## [dp_train] iter 892 / 2000 loss = 0.057118
## [dp_train] iter 893 / 2000 loss = 0.362028
## [dp_train] iter 894 / 2000 loss = 0.045797
## [dp_train] iter 895 / 2000 loss = 0.037211
## [dp_train] iter 896 / 2000 loss = 0.055851
## [dp_train] iter 897 / 2000 loss = 0.232992
## [dp_train] iter 898 / 2000 loss = 0.118642
## [dp_train] iter 899 / 2000 loss = 0.093991
## [dp_train] iter 900 / 2000 loss = 0.314316
## [dp_train] iter 901 / 2000 loss = 0.007251
## [dp_train] iter 902 / 2000 loss = 0.035027
## [dp_train] iter 903 / 2000 loss = 0.011620
## [dp_train] iter 904 / 2000 loss = 0.017314
## [dp_train] iter 905 / 2000 loss = 0.004663
## [dp_train] iter 906 / 2000 loss = 0.005346
## [dp_train] iter 907 / 2000 loss = 0.006111
## [dp_train] iter 908 / 2000 loss = 0.009717
## [dp_train] iter 909 / 2000 loss = 0.029492
## [dp_train] iter 910 / 2000 loss = 0.022263
## [dp_train] iter 911 / 2000 loss = 0.005967
## [dp_train] iter 912 / 2000 loss = 0.008165
## [dp_train] iter 913 / 2000 loss = 0.026088
## [dp_train] iter 914 / 2000 loss = 0.010618
## [dp_train] iter 915 / 2000 loss = 0.002984
## [dp_train] iter 916 / 2000 loss = 0.001675
## [dp_train] iter 917 / 2000 loss = 0.003541
## [dp_train] iter 918 / 2000 loss = 0.007253
## [dp_train] iter 919 / 2000 loss = 0.007831
## [dp_train] iter 920 / 2000 loss = 0.003397
## [dp_train] iter 921 / 2000 loss = 0.017328
## [dp_train] iter 922 / 2000 loss = 0.005069
## [dp_train] iter 923 / 2000 loss = 0.001828
## [dp_train] iter 924 / 2000 loss = 0.022709
## [dp_train] iter 925 / 2000 loss = 0.009742
## [dp_train] iter 926 / 2000 loss = 0.040577
## [dp_train] iter 927 / 2000 loss = 0.011273
## [dp_train] iter 928 / 2000 loss = 0.007438
## [dp_train] iter 929 / 2000 loss = 0.009535
## [dp_train] iter 930 / 2000 loss = 0.012495
## [dp_train] iter 931 / 2000 loss = 0.019943
## [dp_train] iter 932 / 2000 loss = 0.016078
## [dp_train] iter 933 / 2000 loss = 0.001075
## [dp_train] iter 934 / 2000 loss = 0.001163
## [dp_train] iter 935 / 2000 loss = 0.019647
## [dp_train] iter 936 / 2000 loss = 0.011617
## [dp_train] iter 937 / 2000 loss = 0.008281
## [dp_train] iter 938 / 2000 loss = 0.002726
## [dp_train] iter 939 / 2000 loss = 0.012652
## [dp_train] iter 940 / 2000 loss = 0.008285
## [dp_train] iter 941 / 2000 loss = 0.004710
## [dp_train] iter 942 / 2000 loss = 0.361218
## [dp_train] iter 943 / 2000 loss = 0.005422
## [dp_train] iter 944 / 2000 loss = 0.009194
## [dp_train] iter 945 / 2000 loss = 0.004306
## [dp_train] iter 946 / 2000 loss = 0.022261
## [dp_train] iter 947 / 2000 loss = 0.002299
## [dp_train] iter 948 / 2000 loss = 0.006675
## [dp_train] iter 949 / 2000 loss = 0.003416
## [dp_train] iter 950 / 2000 loss = 0.007606
## [dp_train] iter 951 / 2000 loss = 1.352172
## [dp_train] iter 952 / 2000 loss = 1.168869
## [dp_train] iter 953 / 2000 loss = 1.545409
## [dp_train] iter 954 / 2000 loss = 0.322034
## [dp_train] iter 955 / 2000 loss = 1.245703
## [dp_train] iter 956 / 2000 loss = 0.543746
## [dp_train] iter 957 / 2000 loss = 1.238698
## [dp_train] iter 958 / 2000 loss = 0.296092
## [dp_train] iter 959 / 2000 loss = 0.916039
## [dp_train] iter 960 / 2000 loss = 0.424677
## [dp_train] iter 961 / 2000 loss = 0.132540
## [dp_train] iter 962 / 2000 loss = 0.809363
## [dp_train] iter 963 / 2000 loss = 0.248406
## [dp_train] iter 964 / 2000 loss = 0.790342
## [dp_train] iter 965 / 2000 loss = 0.542262
## [dp_train] iter 966 / 2000 loss = 0.904820
## [dp_train] iter 967 / 2000 loss = 0.686664
## [dp_train] iter 968 / 2000 loss = 0.350354
## [dp_train] iter 969 / 2000 loss = 0.639502
## [dp_train] iter 970 / 2000 loss = 0.269189
## [dp_train] iter 971 / 2000 loss = 1.178610
## [dp_train] iter 972 / 2000 loss = 0.509955
## [dp_train] iter 973 / 2000 loss = 0.749686
## [dp_train] iter 974 / 2000 loss = 0.490864
## [dp_train] iter 975 / 2000 loss = 0.617454
## [dp_train] iter 976 / 2000 loss = 0.756494
## [dp_train] iter 977 / 2000 loss = 0.851142
## [dp_train] iter 978 / 2000 loss = 1.252785
## [dp_train] iter 979 / 2000 loss = 0.679374
## [dp_train] iter 980 / 2000 loss = 0.265928
## [dp_train] iter 981 / 2000 loss = 0.191349
## [dp_train] iter 982 / 2000 loss = 0.177811
## [dp_train] iter 983 / 2000 loss = 0.319043
## [dp_train] iter 984 / 2000 loss = 0.651762
## [dp_train] iter 985 / 2000 loss = 0.537711
## [dp_train] iter 986 / 2000 loss = 0.776467
## [dp_train] iter 987 / 2000 loss = 0.734394
## [dp_train] iter 988 / 2000 loss = 0.300717
## [dp_train] iter 989 / 2000 loss = 0.481017
## [dp_train] iter 990 / 2000 loss = 0.215609
## [dp_train] iter 991 / 2000 loss = 0.237562
## [dp_train] iter 992 / 2000 loss = 0.546004
## [dp_train] iter 993 / 2000 loss = 0.244447
## [dp_train] iter 994 / 2000 loss = 0.128376
## [dp_train] iter 995 / 2000 loss = 0.282210
## [dp_train] iter 996 / 2000 loss = 0.430877
## [dp_train] iter 997 / 2000 loss = 0.376590
## [dp_train] iter 998 / 2000 loss = 0.413074
## [dp_train] iter 999 / 2000 loss = 0.242187
## [dp_train] iter 1000 / 2000 loss = 0.312637
## [dp_train] iter 1001 / 2000 loss = 0.155661
## [dp_train] iter 1002 / 2000 loss = 0.742942
## [dp_train] iter 1003 / 2000 loss = 0.253986
## [dp_train] iter 1004 / 2000 loss = 0.544981
## [dp_train] iter 1005 / 2000 loss = 0.243353
## [dp_train] iter 1006 / 2000 loss = 0.206520
## [dp_train] iter 1007 / 2000 loss = 1.722494
## [dp_train] iter 1008 / 2000 loss = 0.368687
## [dp_train] iter 1009 / 2000 loss = 0.575113
## [dp_train] iter 1010 / 2000 loss = 0.119089
## [dp_train] iter 1011 / 2000 loss = 0.337798
## [dp_train] iter 1012 / 2000 loss = 0.459571
## [dp_train] iter 1013 / 2000 loss = 0.230569
## [dp_train] iter 1014 / 2000 loss = 0.732998
## [dp_train] iter 1015 / 2000 loss = 0.252759
## [dp_train] iter 1016 / 2000 loss = 0.171139
## [dp_train] iter 1017 / 2000 loss = 0.374012
## [dp_train] iter 1018 / 2000 loss = 0.120165
## [dp_train] iter 1019 / 2000 loss = 0.069941
## [dp_train] iter 1020 / 2000 loss = 1.263562
## [dp_train] iter 1021 / 2000 loss = 0.103082
## [dp_train] iter 1022 / 2000 loss = 0.503604
## [dp_train] iter 1023 / 2000 loss = 0.103633
## [dp_train] iter 1024 / 2000 loss = 0.434645
## [dp_train] iter 1025 / 2000 loss = 0.143939
## [dp_train] iter 1026 / 2000 loss = 0.205300
## [dp_train] iter 1027 / 2000 loss = 0.409513
## [dp_train] iter 1028 / 2000 loss = 0.382463
## [dp_train] iter 1029 / 2000 loss = 0.162246
## [dp_train] iter 1030 / 2000 loss = 0.273003
## [dp_train] iter 1031 / 2000 loss = 0.108046
## [dp_train] iter 1032 / 2000 loss = 0.092640
## [dp_train] iter 1033 / 2000 loss = 0.110976
## [dp_train] iter 1034 / 2000 loss = 0.548991
## [dp_train] iter 1035 / 2000 loss = 0.777601
## [dp_train] iter 1036 / 2000 loss = 0.028149
## [dp_train] iter 1037 / 2000 loss = 0.070375
## [dp_train] iter 1038 / 2000 loss = 0.211843
## [dp_train] iter 1039 / 2000 loss = 0.328031
## [dp_train] iter 1040 / 2000 loss = 0.074444
## [dp_train] iter 1041 / 2000 loss = 0.037098
## [dp_train] iter 1042 / 2000 loss = 0.047598
## [dp_train] iter 1043 / 2000 loss = 0.324184
## [dp_train] iter 1044 / 2000 loss = 0.035585
## [dp_train] iter 1045 / 2000 loss = 0.028145
## [dp_train] iter 1046 / 2000 loss = 0.044006
## [dp_train] iter 1047 / 2000 loss = 0.206705
## [dp_train] iter 1048 / 2000 loss = 0.101950
## [dp_train] iter 1049 / 2000 loss = 0.074151
## [dp_train] iter 1050 / 2000 loss = 0.282036
## [dp_train] iter 1051 / 2000 loss = 0.004857
## [dp_train] iter 1052 / 2000 loss = 0.027763
## [dp_train] iter 1053 / 2000 loss = 0.008056
## [dp_train] iter 1054 / 2000 loss = 0.012645
## [dp_train] iter 1055 / 2000 loss = 0.002969
## [dp_train] iter 1056 / 2000 loss = 0.003514
## [dp_train] iter 1057 / 2000 loss = 0.003982
## [dp_train] iter 1058 / 2000 loss = 0.006757
## [dp_train] iter 1059 / 2000 loss = 0.023088
## [dp_train] iter 1060 / 2000 loss = 0.016553
## [dp_train] iter 1061 / 2000 loss = 0.003960
## [dp_train] iter 1062 / 2000 loss = 0.005577
## [dp_train] iter 1063 / 2000 loss = 0.020405
## [dp_train] iter 1064 / 2000 loss = 0.007715
## [dp_train] iter 1065 / 2000 loss = 0.001876
## [dp_train] iter 1066 / 2000 loss = 0.001039
## [dp_train] iter 1067 / 2000 loss = 0.002270
## [dp_train] iter 1068 / 2000 loss = 0.004936
## [dp_train] iter 1069 / 2000 loss = 0.005516
## [dp_train] iter 1070 / 2000 loss = 0.002131
## [dp_train] iter 1071 / 2000 loss = 0.012986
## [dp_train] iter 1072 / 2000 loss = 0.003251
## [dp_train] iter 1073 / 2000 loss = 0.001065
## [dp_train] iter 1074 / 2000 loss = 0.017029
## [dp_train] iter 1075 / 2000 loss = 0.006883
## [dp_train] iter 1076 / 2000 loss = 0.033431
## [dp_train] iter 1077 / 2000 loss = 0.008084
## [dp_train] iter 1078 / 2000 loss = 0.005216
## [dp_train] iter 1079 / 2000 loss = 0.006848
## [dp_train] iter 1080 / 2000 loss = 0.009021
## [dp_train] iter 1081 / 2000 loss = 0.015021
## [dp_train] iter 1082 / 2000 loss = 0.012025
## [dp_train] iter 1083 / 2000 loss = 0.000657
## [dp_train] iter 1084 / 2000 loss = 0.000734
## [dp_train] iter 1085 / 2000 loss = 0.014966
## [dp_train] iter 1086 / 2000 loss = 0.008532
## [dp_train] iter 1087 / 2000 loss = 0.005889
## [dp_train] iter 1088 / 2000 loss = 0.001753
## [dp_train] iter 1089 / 2000 loss = 0.009707
## [dp_train] iter 1090 / 2000 loss = 0.005983
## [dp_train] iter 1091 / 2000 loss = 0.003200
## [dp_train] iter 1092 / 2000 loss = 0.364902
## [dp_train] iter 1093 / 2000 loss = 0.003741
## [dp_train] iter 1094 / 2000 loss = 0.006296
## [dp_train] iter 1095 / 2000 loss = 0.002860
## [dp_train] iter 1096 / 2000 loss = 0.017672
## [dp_train] iter 1097 / 2000 loss = 0.001447
## [dp_train] iter 1098 / 2000 loss = 0.004661
## [dp_train] iter 1099 / 2000 loss = 0.002277
## [dp_train] iter 1100 / 2000 loss = 0.005411
## [dp_train] iter 1101 / 2000 loss = 1.229014
## [dp_train] iter 1102 / 2000 loss = 1.099263
## [dp_train] iter 1103 / 2000 loss = 1.462979
## [dp_train] iter 1104 / 2000 loss = 0.254668
## [dp_train] iter 1105 / 2000 loss = 1.198063
## [dp_train] iter 1106 / 2000 loss = 0.475821
## [dp_train] iter 1107 / 2000 loss = 1.200789
## [dp_train] iter 1108 / 2000 loss = 0.221286
## [dp_train] iter 1109 / 2000 loss = 0.814224
## [dp_train] iter 1110 / 2000 loss = 0.355897
## [dp_train] iter 1111 / 2000 loss = 0.085900
## [dp_train] iter 1112 / 2000 loss = 0.755356
## [dp_train] iter 1113 / 2000 loss = 0.177447
## [dp_train] iter 1114 / 2000 loss = 0.755161
## [dp_train] iter 1115 / 2000 loss = 0.456401
## [dp_train] iter 1116 / 2000 loss = 0.798374
## [dp_train] iter 1117 / 2000 loss = 0.648085
## [dp_train] iter 1118 / 2000 loss = 0.274347
## [dp_train] iter 1119 / 2000 loss = 0.582077
## [dp_train] iter 1120 / 2000 loss = 0.203319
## [dp_train] iter 1121 / 2000 loss = 1.216401
## [dp_train] iter 1122 / 2000 loss = 0.423675
## [dp_train] iter 1123 / 2000 loss = 0.730115
## [dp_train] iter 1124 / 2000 loss = 0.429529
## [dp_train] iter 1125 / 2000 loss = 0.531278
## [dp_train] iter 1126 / 2000 loss = 0.663499
## [dp_train] iter 1127 / 2000 loss = 0.761236
## [dp_train] iter 1128 / 2000 loss = 1.216952
## [dp_train] iter 1129 / 2000 loss = 0.634932
## [dp_train] iter 1130 / 2000 loss = 0.198076
## [dp_train] iter 1131 / 2000 loss = 0.139166
## [dp_train] iter 1132 / 2000 loss = 0.126976
## [dp_train] iter 1133 / 2000 loss = 0.248850
## [dp_train] iter 1134 / 2000 loss = 0.644287
## [dp_train] iter 1135 / 2000 loss = 0.492760
## [dp_train] iter 1136 / 2000 loss = 0.726951
## [dp_train] iter 1137 / 2000 loss = 0.655188
## [dp_train] iter 1138 / 2000 loss = 0.240227
## [dp_train] iter 1139 / 2000 loss = 0.409043
## [dp_train] iter 1140 / 2000 loss = 0.165898
## [dp_train] iter 1141 / 2000 loss = 0.184947
## [dp_train] iter 1142 / 2000 loss = 0.487466
## [dp_train] iter 1143 / 2000 loss = 0.185616
## [dp_train] iter 1144 / 2000 loss = 0.090364
## [dp_train] iter 1145 / 2000 loss = 0.227819
## [dp_train] iter 1146 / 2000 loss = 0.361033
## [dp_train] iter 1147 / 2000 loss = 0.312889
## [dp_train] iter 1148 / 2000 loss = 0.339960
## [dp_train] iter 1149 / 2000 loss = 0.189004
## [dp_train] iter 1150 / 2000 loss = 0.252759
## [dp_train] iter 1151 / 2000 loss = 0.119817
## [dp_train] iter 1152 / 2000 loss = 0.698157
## [dp_train] iter 1153 / 2000 loss = 0.239376
## [dp_train] iter 1154 / 2000 loss = 0.508135
## [dp_train] iter 1155 / 2000 loss = 0.203544
## [dp_train] iter 1156 / 2000 loss = 0.194957
## [dp_train] iter 1157 / 2000 loss = 1.788762
## [dp_train] iter 1158 / 2000 loss = 0.373144
## [dp_train] iter 1159 / 2000 loss = 0.533017
## [dp_train] iter 1160 / 2000 loss = 0.103800
## [dp_train] iter 1161 / 2000 loss = 0.325232
## [dp_train] iter 1162 / 2000 loss = 0.423266
## [dp_train] iter 1163 / 2000 loss = 0.212814
## [dp_train] iter 1164 / 2000 loss = 0.679316
## [dp_train] iter 1165 / 2000 loss = 0.200641
## [dp_train] iter 1166 / 2000 loss = 0.147484
## [dp_train] iter 1167 / 2000 loss = 0.362028
## [dp_train] iter 1168 / 2000 loss = 0.112101
## [dp_train] iter 1169 / 2000 loss = 0.056794
## [dp_train] iter 1170 / 2000 loss = 1.336468
## [dp_train] iter 1171 / 2000 loss = 0.087898
## [dp_train] iter 1172 / 2000 loss = 0.447064
## [dp_train] iter 1173 / 2000 loss = 0.095043
## [dp_train] iter 1174 / 2000 loss = 0.411320
## [dp_train] iter 1175 / 2000 loss = 0.128424
## [dp_train] iter 1176 / 2000 loss = 0.205296
## [dp_train] iter 1177 / 2000 loss = 0.388433
## [dp_train] iter 1178 / 2000 loss = 0.358974
## [dp_train] iter 1179 / 2000 loss = 0.132895
## [dp_train] iter 1180 / 2000 loss = 0.287461
## [dp_train] iter 1181 / 2000 loss = 0.101940
## [dp_train] iter 1182 / 2000 loss = 0.093040
## [dp_train] iter 1183 / 2000 loss = 0.087148
## [dp_train] iter 1184 / 2000 loss = 0.545969
## [dp_train] iter 1185 / 2000 loss = 0.776475
## [dp_train] iter 1186 / 2000 loss = 0.023798
## [dp_train] iter 1187 / 2000 loss = 0.055482
## [dp_train] iter 1188 / 2000 loss = 0.193019
## [dp_train] iter 1189 / 2000 loss = 0.302950
## [dp_train] iter 1190 / 2000 loss = 0.065883
## [dp_train] iter 1191 / 2000 loss = 0.028339
## [dp_train] iter 1192 / 2000 loss = 0.040370
## [dp_train] iter 1193 / 2000 loss = 0.283392
## [dp_train] iter 1194 / 2000 loss = 0.028241
## [dp_train] iter 1195 / 2000 loss = 0.021441
## [dp_train] iter 1196 / 2000 loss = 0.035483
## [dp_train] iter 1197 / 2000 loss = 0.179486
## [dp_train] iter 1198 / 2000 loss = 0.087689
## [dp_train] iter 1199 / 2000 loss = 0.059469
## [dp_train] iter 1200 / 2000 loss = 0.252959
## [dp_train] iter 1201 / 2000 loss = 0.003412
## [dp_train] iter 1202 / 2000 loss = 0.021844
## [dp_train] iter 1203 / 2000 loss = 0.005667
## [dp_train] iter 1204 / 2000 loss = 0.009258
## [dp_train] iter 1205 / 2000 loss = 0.001966
## [dp_train] iter 1206 / 2000 loss = 0.002353
## [dp_train] iter 1207 / 2000 loss = 0.002684
## [dp_train] iter 1208 / 2000 loss = 0.004820
## [dp_train] iter 1209 / 2000 loss = 0.017553
## [dp_train] iter 1210 / 2000 loss = 0.012381
## [dp_train] iter 1211 / 2000 loss = 0.002723
## [dp_train] iter 1212 / 2000 loss = 0.003920
## [dp_train] iter 1213 / 2000 loss = 0.015838
## [dp_train] iter 1214 / 2000 loss = 0.005498
## [dp_train] iter 1215 / 2000 loss = 0.001262
## [dp_train] iter 1216 / 2000 loss = 0.000670
## [dp_train] iter 1217 / 2000 loss = 0.001492
## [dp_train] iter 1218 / 2000 loss = 0.003516
## [dp_train] iter 1219 / 2000 loss = 0.003996
## [dp_train] iter 1220 / 2000 loss = 0.001371
## [dp_train] iter 1221 / 2000 loss = 0.009993
## [dp_train] iter 1222 / 2000 loss = 0.002154
## [dp_train] iter 1223 / 2000 loss = 0.000641
## [dp_train] iter 1224 / 2000 loss = 0.013071
## [dp_train] iter 1225 / 2000 loss = 0.004985
## [dp_train] iter 1226 / 2000 loss = 0.027398
## [dp_train] iter 1227 / 2000 loss = 0.005954
## [dp_train] iter 1228 / 2000 loss = 0.003821
## [dp_train] iter 1229 / 2000 loss = 0.005087
## [dp_train] iter 1230 / 2000 loss = 0.006645
## [dp_train] iter 1231 / 2000 loss = 0.011562
## [dp_train] iter 1232 / 2000 loss = 0.009322
## [dp_train] iter 1233 / 2000 loss = 0.000409
## [dp_train] iter 1234 / 2000 loss = 0.000476
## [dp_train] iter 1235 / 2000 loss = 0.011676
## [dp_train] iter 1236 / 2000 loss = 0.006388
## [dp_train] iter 1237 / 2000 loss = 0.004456
## [dp_train] iter 1238 / 2000 loss = 0.001198
## [dp_train] iter 1239 / 2000 loss = 0.007326
## [dp_train] iter 1240 / 2000 loss = 0.004462
## [dp_train] iter 1241 / 2000 loss = 0.002281
## [dp_train] iter 1242 / 2000 loss = 0.344866
## [dp_train] iter 1243 / 2000 loss = 0.002616
## [dp_train] iter 1244 / 2000 loss = 0.004448
## [dp_train] iter 1245 / 2000 loss = 0.001954
## [dp_train] iter 1246 / 2000 loss = 0.014056
## [dp_train] iter 1247 / 2000 loss = 0.000961
## [dp_train] iter 1248 / 2000 loss = 0.003336
## [dp_train] iter 1249 / 2000 loss = 0.001570
## [dp_train] iter 1250 / 2000 loss = 0.003986
## [dp_train] iter 1251 / 2000 loss = 1.090981
## [dp_train] iter 1252 / 2000 loss = 1.019501
## [dp_train] iter 1253 / 2000 loss = 1.365297
## [dp_train] iter 1254 / 2000 loss = 0.210455
## [dp_train] iter 1255 / 2000 loss = 1.136895
## [dp_train] iter 1256 / 2000 loss = 0.421051
## [dp_train] iter 1257 / 2000 loss = 1.150291
## [dp_train] iter 1258 / 2000 loss = 0.173722
## [dp_train] iter 1259 / 2000 loss = 0.716280
## [dp_train] iter 1260 / 2000 loss = 0.306744
## [dp_train] iter 1261 / 2000 loss = 0.059675
## [dp_train] iter 1262 / 2000 loss = 0.704902
## [dp_train] iter 1263 / 2000 loss = 0.130233
## [dp_train] iter 1264 / 2000 loss = 0.714481
## [dp_train] iter 1265 / 2000 loss = 0.385942
## [dp_train] iter 1266 / 2000 loss = 0.692904
## [dp_train] iter 1267 / 2000 loss = 0.617465
## [dp_train] iter 1268 / 2000 loss = 0.215481
## [dp_train] iter 1269 / 2000 loss = 0.544752
## [dp_train] iter 1270 / 2000 loss = 0.158160
## [dp_train] iter 1271 / 2000 loss = 1.253532
## [dp_train] iter 1272 / 2000 loss = 0.345813
## [dp_train] iter 1273 / 2000 loss = 0.715636
## [dp_train] iter 1274 / 2000 loss = 0.370708
## [dp_train] iter 1275 / 2000 loss = 0.453150
## [dp_train] iter 1276 / 2000 loss = 0.575764
## [dp_train] iter 1277 / 2000 loss = 0.678866
## [dp_train] iter 1278 / 2000 loss = 1.183948
## [dp_train] iter 1279 / 2000 loss = 0.591579
## [dp_train] iter 1280 / 2000 loss = 0.150478
## [dp_train] iter 1281 / 2000 loss = 0.105373
## [dp_train] iter 1282 / 2000 loss = 0.093929
## [dp_train] iter 1283 / 2000 loss = 0.195568
## [dp_train] iter 1284 / 2000 loss = 0.648453
## [dp_train] iter 1285 / 2000 loss = 0.459652
## [dp_train] iter 1286 / 2000 loss = 0.677454
## [dp_train] iter 1287 / 2000 loss = 0.580450
## [dp_train] iter 1288 / 2000 loss = 0.193334
## [dp_train] iter 1289 / 2000 loss = 0.349203
## [dp_train] iter 1290 / 2000 loss = 0.133783
## [dp_train] iter 1291 / 2000 loss = 0.149472
## [dp_train] iter 1292 / 2000 loss = 0.429936
## [dp_train] iter 1293 / 2000 loss = 0.143798
## [dp_train] iter 1294 / 2000 loss = 0.067761
## [dp_train] iter 1295 / 2000 loss = 0.186447
## [dp_train] iter 1296 / 2000 loss = 0.303203
## [dp_train] iter 1297 / 2000 loss = 0.261623
## [dp_train] iter 1298 / 2000 loss = 0.275015
## [dp_train] iter 1299 / 2000 loss = 0.153908
## [dp_train] iter 1300 / 2000 loss = 0.206767
## [dp_train] iter 1301 / 2000 loss = 0.092541
## [dp_train] iter 1302 / 2000 loss = 0.641727
## [dp_train] iter 1303 / 2000 loss = 0.220477
## [dp_train] iter 1304 / 2000 loss = 0.463661
## [dp_train] iter 1305 / 2000 loss = 0.168620
## [dp_train] iter 1306 / 2000 loss = 0.178329
## [dp_train] iter 1307 / 2000 loss = 1.788406
## [dp_train] iter 1308 / 2000 loss = 0.368858
## [dp_train] iter 1309 / 2000 loss = 0.479070
## [dp_train] iter 1310 / 2000 loss = 0.088983
## [dp_train] iter 1311 / 2000 loss = 0.310624
## [dp_train] iter 1312 / 2000 loss = 0.385688
## [dp_train] iter 1313 / 2000 loss = 0.192395
## [dp_train] iter 1314 / 2000 loss = 0.605531
## [dp_train] iter 1315 / 2000 loss = 0.156496
## [dp_train] iter 1316 / 2000 loss = 0.125622
## [dp_train] iter 1317 / 2000 loss = 0.348149
## [dp_train] iter 1318 / 2000 loss = 0.104757
## [dp_train] iter 1319 / 2000 loss = 0.044825
## [dp_train] iter 1320 / 2000 loss = 1.370312
## [dp_train] iter 1321 / 2000 loss = 0.074090
## [dp_train] iter 1322 / 2000 loss = 0.391325
## [dp_train] iter 1323 / 2000 loss = 0.085281
## [dp_train] iter 1324 / 2000 loss = 0.388327
## [dp_train] iter 1325 / 2000 loss = 0.114778
## [dp_train] iter 1326 / 2000 loss = 0.206449
## [dp_train] iter 1327 / 2000 loss = 0.367561
## [dp_train] iter 1328 / 2000 loss = 0.337473
## [dp_train] iter 1329 / 2000 loss = 0.106522
## [dp_train] iter 1330 / 2000 loss = 0.304581
## [dp_train] iter 1331 / 2000 loss = 0.095148
## [dp_train] iter 1332 / 2000 loss = 0.096146
## [dp_train] iter 1333 / 2000 loss = 0.067171
## [dp_train] iter 1334 / 2000 loss = 0.542262
## [dp_train] iter 1335 / 2000 loss = 0.775568
## [dp_train] iter 1336 / 2000 loss = 0.020051
## [dp_train] iter 1337 / 2000 loss = 0.044063
## [dp_train] iter 1338 / 2000 loss = 0.178308
## [dp_train] iter 1339 / 2000 loss = 0.280760
## [dp_train] iter 1340 / 2000 loss = 0.058433
## [dp_train] iter 1341 / 2000 loss = 0.021892
## [dp_train] iter 1342 / 2000 loss = 0.034188
## [dp_train] iter 1343 / 2000 loss = 0.242286
## [dp_train] iter 1344 / 2000 loss = 0.022395
## [dp_train] iter 1345 / 2000 loss = 0.016352
## [dp_train] iter 1346 / 2000 loss = 0.028732
## [dp_train] iter 1347 / 2000 loss = 0.151831
## [dp_train] iter 1348 / 2000 loss = 0.077043
## [dp_train] iter 1349 / 2000 loss = 0.048487
## [dp_train] iter 1350 / 2000 loss = 0.225871
## [dp_train] iter 1351 / 2000 loss = 0.002492
## [dp_train] iter 1352 / 2000 loss = 0.017229
## [dp_train] iter 1353 / 2000 loss = 0.004082
## [dp_train] iter 1354 / 2000 loss = 0.006843
## [dp_train] iter 1355 / 2000 loss = 0.001398
## [dp_train] iter 1356 / 2000 loss = 0.001645
## [dp_train] iter 1357 / 2000 loss = 0.001870
## [dp_train] iter 1358 / 2000 loss = 0.003568
## [dp_train] iter 1359 / 2000 loss = 0.013228
## [dp_train] iter 1360 / 2000 loss = 0.009488
## [dp_train] iter 1361 / 2000 loss = 0.001961
## [dp_train] iter 1362 / 2000 loss = 0.002846
## [dp_train] iter 1363 / 2000 loss = 0.012308
## [dp_train] iter 1364 / 2000 loss = 0.003924
## [dp_train] iter 1365 / 2000 loss = 0.000919
## [dp_train] iter 1366 / 2000 loss = 0.000470
## [dp_train] iter 1367 / 2000 loss = 0.001028
## [dp_train] iter 1368 / 2000 loss = 0.002616
## [dp_train] iter 1369 / 2000 loss = 0.003048
## [dp_train] iter 1370 / 2000 loss = 0.000927
## [dp_train] iter 1371 / 2000 loss = 0.008018
## [dp_train] iter 1372 / 2000 loss = 0.001484
## [dp_train] iter 1373 / 2000 loss = 0.000421
## [dp_train] iter 1374 / 2000 loss = 0.010317
## [dp_train] iter 1375 / 2000 loss = 0.003737
## [dp_train] iter 1376 / 2000 loss = 0.022426
## [dp_train] iter 1377 / 2000 loss = 0.004542
## [dp_train] iter 1378 / 2000 loss = 0.002922
## [dp_train] iter 1379 / 2000 loss = 0.003912
## [dp_train] iter 1380 / 2000 loss = 0.004983
## [dp_train] iter 1381 / 2000 loss = 0.009069
## [dp_train] iter 1382 / 2000 loss = 0.007486
## [dp_train] iter 1383 / 2000 loss = 0.000269
## [dp_train] iter 1384 / 2000 loss = 0.000324
## [dp_train] iter 1385 / 2000 loss = 0.009241
## [dp_train] iter 1386 / 2000 loss = 0.004921
## [dp_train] iter 1387 / 2000 loss = 0.003529
## [dp_train] iter 1388 / 2000 loss = 0.000859
## [dp_train] iter 1389 / 2000 loss = 0.005508
## [dp_train] iter 1390 / 2000 loss = 0.003444
## [dp_train] iter 1391 / 2000 loss = 0.001695
## [dp_train] iter 1392 / 2000 loss = 0.310101
## [dp_train] iter 1393 / 2000 loss = 0.001862
## [dp_train] iter 1394 / 2000 loss = 0.003306
## [dp_train] iter 1395 / 2000 loss = 0.001385
## [dp_train] iter 1396 / 2000 loss = 0.011337
## [dp_train] iter 1397 / 2000 loss = 0.000674
## [dp_train] iter 1398 / 2000 loss = 0.002450
## [dp_train] iter 1399 / 2000 loss = 0.001137
## [dp_train] iter 1400 / 2000 loss = 0.003045
## [dp_train] iter 1401 / 2000 loss = 0.947462
## [dp_train] iter 1402 / 2000 loss = 0.932593
## [dp_train] iter 1403 / 2000 loss = 1.259748
## [dp_train] iter 1404 / 2000 loss = 0.183148
## [dp_train] iter 1405 / 2000 loss = 1.076447
## [dp_train] iter 1406 / 2000 loss = 0.379405
## [dp_train] iter 1407 / 2000 loss = 1.090179
## [dp_train] iter 1408 / 2000 loss = 0.142388
## [dp_train] iter 1409 / 2000 loss = 0.624281
## [dp_train] iter 1410 / 2000 loss = 0.272475
## [dp_train] iter 1411 / 2000 loss = 0.044069
## [dp_train] iter 1412 / 2000 loss = 0.655339
## [dp_train] iter 1413 / 2000 loss = 0.096503
## [dp_train] iter 1414 / 2000 loss = 0.670905
## [dp_train] iter 1415 / 2000 loss = 0.327904
## [dp_train] iter 1416 / 2000 loss = 0.591509
## [dp_train] iter 1417 / 2000 loss = 0.595595
## [dp_train] iter 1418 / 2000 loss = 0.170562
## [dp_train] iter 1419 / 2000 loss = 0.517174
## [dp_train] iter 1420 / 2000 loss = 0.123726
## [dp_train] iter 1421 / 2000 loss = 1.288748
## [dp_train] iter 1422 / 2000 loss = 0.280923
## [dp_train] iter 1423 / 2000 loss = 0.709584
## [dp_train] iter 1424 / 2000 loss = 0.316101
## [dp_train] iter 1425 / 2000 loss = 0.378974
## [dp_train] iter 1426 / 2000 loss = 0.493566
## [dp_train] iter 1427 / 2000 loss = 0.602622
## [dp_train] iter 1428 / 2000 loss = 1.147923
## [dp_train] iter 1429 / 2000 loss = 0.555534
## [dp_train] iter 1430 / 2000 loss = 0.116515
## [dp_train] iter 1431 / 2000 loss = 0.080421
## [dp_train] iter 1432 / 2000 loss = 0.070299
## [dp_train] iter 1433 / 2000 loss = 0.155530
## [dp_train] iter 1434 / 2000 loss = 0.671665
## [dp_train] iter 1435 / 2000 loss = 0.436088
## [dp_train] iter 1436 / 2000 loss = 0.625544
## [dp_train] iter 1437 / 2000 loss = 0.507756
## [dp_train] iter 1438 / 2000 loss = 0.160672
## [dp_train] iter 1439 / 2000 loss = 0.299719
## [dp_train] iter 1440 / 2000 loss = 0.110252
## [dp_train] iter 1441 / 2000 loss = 0.125254
## [dp_train] iter 1442 / 2000 loss = 0.376221
## [dp_train] iter 1443 / 2000 loss = 0.113199
## [dp_train] iter 1444 / 2000 loss = 0.052680
## [dp_train] iter 1445 / 2000 loss = 0.154317
## [dp_train] iter 1446 / 2000 loss = 0.255974
## [dp_train] iter 1447 / 2000 loss = 0.220829
## [dp_train] iter 1448 / 2000 loss = 0.222443
## [dp_train] iter 1449 / 2000 loss = 0.129445
## [dp_train] iter 1450 / 2000 loss = 0.171479
## [dp_train] iter 1451 / 2000 loss = 0.071582
## [dp_train] iter 1452 / 2000 loss = 0.568425
## [dp_train] iter 1453 / 2000 loss = 0.201330
## [dp_train] iter 1454 / 2000 loss = 0.425832
## [dp_train] iter 1455 / 2000 loss = 0.140193
## [dp_train] iter 1456 / 2000 loss = 0.163156
## [dp_train] iter 1457 / 2000 loss = 1.736798
## [dp_train] iter 1458 / 2000 loss = 0.362844
## [dp_train] iter 1459 / 2000 loss = 0.433612
## [dp_train] iter 1460 / 2000 loss = 0.076714
## [dp_train] iter 1461 / 2000 loss = 0.297999
## [dp_train] iter 1462 / 2000 loss = 0.347931
## [dp_train] iter 1463 / 2000 loss = 0.174421
## [dp_train] iter 1464 / 2000 loss = 0.511693
## [dp_train] iter 1465 / 2000 loss = 0.117778
## [dp_train] iter 1466 / 2000 loss = 0.107124
## [dp_train] iter 1467 / 2000 loss = 0.335972
## [dp_train] iter 1468 / 2000 loss = 0.100085
## [dp_train] iter 1469 / 2000 loss = 0.035199
## [dp_train] iter 1470 / 2000 loss = 1.355096
## [dp_train] iter 1471 / 2000 loss = 0.063302
## [dp_train] iter 1472 / 2000 loss = 0.334766
## [dp_train] iter 1473 / 2000 loss = 0.076918
## [dp_train] iter 1474 / 2000 loss = 0.367875
## [dp_train] iter 1475 / 2000 loss = 0.103892
## [dp_train] iter 1476 / 2000 loss = 0.209852
## [dp_train] iter 1477 / 2000 loss = 0.350259
## [dp_train] iter 1478 / 2000 loss = 0.318430
## [dp_train] iter 1479 / 2000 loss = 0.084864
## [dp_train] iter 1480 / 2000 loss = 0.325044
## [dp_train] iter 1481 / 2000 loss = 0.089501
## [dp_train] iter 1482 / 2000 loss = 0.101597
## [dp_train] iter 1483 / 2000 loss = 0.051556
## [dp_train] iter 1484 / 2000 loss = 0.548111
## [dp_train] iter 1485 / 2000 loss = 0.759302
## [dp_train] iter 1486 / 2000 loss = 0.017187
## [dp_train] iter 1487 / 2000 loss = 0.035202
## [dp_train] iter 1488 / 2000 loss = 0.166957
## [dp_train] iter 1489 / 2000 loss = 0.261892
## [dp_train] iter 1490 / 2000 loss = 0.052568
## [dp_train] iter 1491 / 2000 loss = 0.017093
## [dp_train] iter 1492 / 2000 loss = 0.029411
## [dp_train] iter 1493 / 2000 loss = 0.203766
## [dp_train] iter 1494 / 2000 loss = 0.017964
## [dp_train] iter 1495 / 2000 loss = 0.012673
## [dp_train] iter 1496 / 2000 loss = 0.023618
## [dp_train] iter 1497 / 2000 loss = 0.129561
## [dp_train] iter 1498 / 2000 loss = 0.068490
## [dp_train] iter 1499 / 2000 loss = 0.040082
## [dp_train] iter 1500 / 2000 loss = 0.201309
## [dp_train] iter 1501 / 2000 loss = 0.001903
## [dp_train] iter 1502 / 2000 loss = 0.013781
## [dp_train] iter 1503 / 2000 loss = 0.003030
## [dp_train] iter 1504 / 2000 loss = 0.005168
## [dp_train] iter 1505 / 2000 loss = 0.001037
## [dp_train] iter 1506 / 2000 loss = 0.001214
## [dp_train] iter 1507 / 2000 loss = 0.001353
## [dp_train] iter 1508 / 2000 loss = 0.002738
## [dp_train] iter 1509 / 2000 loss = 0.010033
## [dp_train] iter 1510 / 2000 loss = 0.007421
## [dp_train] iter 1511 / 2000 loss = 0.001512
## [dp_train] iter 1512 / 2000 loss = 0.002134
## [dp_train] iter 1513 / 2000 loss = 0.009707
## [dp_train] iter 1514 / 2000 loss = 0.002829
## [dp_train] iter 1515 / 2000 loss = 0.000702
## [dp_train] iter 1516 / 2000 loss = 0.000348
## [dp_train] iter 1517 / 2000 loss = 0.000736
## [dp_train] iter 1518 / 2000 loss = 0.002028
## [dp_train] iter 1519 / 2000 loss = 0.002425
## [dp_train] iter 1520 / 2000 loss = 0.000674
## [dp_train] iter 1521 / 2000 loss = 0.006657
## [dp_train] iter 1522 / 2000 loss = 0.001102
## [dp_train] iter 1523 / 2000 loss = 0.000292
## [dp_train] iter 1524 / 2000 loss = 0.008433
## [dp_train] iter 1525 / 2000 loss = 0.002878
## [dp_train] iter 1526 / 2000 loss = 0.018727
## [dp_train] iter 1527 / 2000 loss = 0.003577
## [dp_train] iter 1528 / 2000 loss = 0.002300
## [dp_train] iter 1529 / 2000 loss = 0.003118
## [dp_train] iter 1530 / 2000 loss = 0.003843
## [dp_train] iter 1531 / 2000 loss = 0.007233
## [dp_train] iter 1532 / 2000 loss = 0.006226
## [dp_train] iter 1533 / 2000 loss = 0.000187
## [dp_train] iter 1534 / 2000 loss = 0.000230
## [dp_train] iter 1535 / 2000 loss = 0.007464
## [dp_train] iter 1536 / 2000 loss = 0.003883
## [dp_train] iter 1537 / 2000 loss = 0.002892
## [dp_train] iter 1538 / 2000 loss = 0.000641
## [dp_train] iter 1539 / 2000 loss = 0.004181
## [dp_train] iter 1540 / 2000 loss = 0.002735
## [dp_train] iter 1541 / 2000 loss = 0.001304
## [dp_train] iter 1542 / 2000 loss = 0.274384
## [dp_train] iter 1543 / 2000 loss = 0.001366
## [dp_train] iter 1544 / 2000 loss = 0.002566
## [dp_train] iter 1545 / 2000 loss = 0.001019
## [dp_train] iter 1546 / 2000 loss = 0.009293
## [dp_train] iter 1547 / 2000 loss = 0.000496
## [dp_train] iter 1548 / 2000 loss = 0.001855
## [dp_train] iter 1549 / 2000 loss = 0.000874
## [dp_train] iter 1550 / 2000 loss = 0.002399
## [dp_train] iter 1551 / 2000 loss = 0.808442
## [dp_train] iter 1552 / 2000 loss = 0.842856
## [dp_train] iter 1553 / 2000 loss = 1.150162
## [dp_train] iter 1554 / 2000 loss = 0.161718
## [dp_train] iter 1555 / 2000 loss = 1.015188
## [dp_train] iter 1556 / 2000 loss = 0.343701
## [dp_train] iter 1557 / 2000 loss = 1.023224
## [dp_train] iter 1558 / 2000 loss = 0.118462
## [dp_train] iter 1559 / 2000 loss = 0.537612
## [dp_train] iter 1560 / 2000 loss = 0.243317
## [dp_train] iter 1561 / 2000 loss = 0.034507
## [dp_train] iter 1562 / 2000 loss = 0.597028
## [dp_train] iter 1563 / 2000 loss = 0.071904
## [dp_train] iter 1564 / 2000 loss = 0.619339
## [dp_train] iter 1565 / 2000 loss = 0.278381
## [dp_train] iter 1566 / 2000 loss = 0.497195
## [dp_train] iter 1567 / 2000 loss = 0.569697
## [dp_train] iter 1568 / 2000 loss = 0.134771
## [dp_train] iter 1569 / 2000 loss = 0.488918
## [dp_train] iter 1570 / 2000 loss = 0.097080
## [dp_train] iter 1571 / 2000 loss = 1.309279
## [dp_train] iter 1572 / 2000 loss = 0.224859
## [dp_train] iter 1573 / 2000 loss = 0.698853
## [dp_train] iter 1574 / 2000 loss = 0.264163
## [dp_train] iter 1575 / 2000 loss = 0.307348
## [dp_train] iter 1576 / 2000 loss = 0.416752
## [dp_train] iter 1577 / 2000 loss = 0.529870
## [dp_train] iter 1578 / 2000 loss = 1.104746
## [dp_train] iter 1579 / 2000 loss = 0.514982
## [dp_train] iter 1580 / 2000 loss = 0.091154
## [dp_train] iter 1581 / 2000 loss = 0.062228
## [dp_train] iter 1582 / 2000 loss = 0.053523
## [dp_train] iter 1583 / 2000 loss = 0.124532
## [dp_train] iter 1584 / 2000 loss = 0.696386
## [dp_train] iter 1585 / 2000 loss = 0.415856
## [dp_train] iter 1586 / 2000 loss = 0.569086
## [dp_train] iter 1587 / 2000 loss = 0.435621
## [dp_train] iter 1588 / 2000 loss = 0.135057
## [dp_train] iter 1589 / 2000 loss = 0.256930
## [dp_train] iter 1590 / 2000 loss = 0.092052
## [dp_train] iter 1591 / 2000 loss = 0.104552
## [dp_train] iter 1592 / 2000 loss = 0.321960
## [dp_train] iter 1593 / 2000 loss = 0.089525
## [dp_train] iter 1594 / 2000 loss = 0.041572
## [dp_train] iter 1595 / 2000 loss = 0.128898
## [dp_train] iter 1596 / 2000 loss = 0.210467
## [dp_train] iter 1597 / 2000 loss = 0.186721
## [dp_train] iter 1598 / 2000 loss = 0.176219
## [dp_train] iter 1599 / 2000 loss = 0.110870
## [dp_train] iter 1600 / 2000 loss = 0.142650
## [dp_train] iter 1601 / 2000 loss = 0.056899
## [dp_train] iter 1602 / 2000 loss = 0.504786
## [dp_train] iter 1603 / 2000 loss = 0.189354
## [dp_train] iter 1604 / 2000 loss = 0.402251
## [dp_train] iter 1605 / 2000 loss = 0.119498
## [dp_train] iter 1606 / 2000 loss = 0.151912
## [dp_train] iter 1607 / 2000 loss = 1.656074
## [dp_train] iter 1608 / 2000 loss = 0.361932
## [dp_train] iter 1609 / 2000 loss = 0.389155
## [dp_train] iter 1610 / 2000 loss = 0.067472
## [dp_train] iter 1611 / 2000 loss = 0.292145
## [dp_train] iter 1612 / 2000 loss = 0.311915
## [dp_train] iter 1613 / 2000 loss = 0.160745
## [dp_train] iter 1614 / 2000 loss = 0.420390
## [dp_train] iter 1615 / 2000 loss = 0.088907
## [dp_train] iter 1616 / 2000 loss = 0.093232
## [dp_train] iter 1617 / 2000 loss = 0.329387
## [dp_train] iter 1618 / 2000 loss = 0.096419
## [dp_train] iter 1619 / 2000 loss = 0.027893
## [dp_train] iter 1620 / 2000 loss = 1.336109
## [dp_train] iter 1621 / 2000 loss = 0.054877
## [dp_train] iter 1622 / 2000 loss = 0.281729
## [dp_train] iter 1623 / 2000 loss = 0.069742
## [dp_train] iter 1624 / 2000 loss = 0.345838
## [dp_train] iter 1625 / 2000 loss = 0.095914
## [dp_train] iter 1626 / 2000 loss = 0.215779
## [dp_train] iter 1627 / 2000 loss = 0.330882
## [dp_train] iter 1628 / 2000 loss = 0.306461
## [dp_train] iter 1629 / 2000 loss = 0.067202
## [dp_train] iter 1630 / 2000 loss = 0.347550
## [dp_train] iter 1631 / 2000 loss = 0.084098
## [dp_train] iter 1632 / 2000 loss = 0.107994
## [dp_train] iter 1633 / 2000 loss = 0.038807
## [dp_train] iter 1634 / 2000 loss = 0.561056
## [dp_train] iter 1635 / 2000 loss = 0.731221
## [dp_train] iter 1636 / 2000 loss = 0.014429
## [dp_train] iter 1637 / 2000 loss = 0.028512
## [dp_train] iter 1638 / 2000 loss = 0.157910
## [dp_train] iter 1639 / 2000 loss = 0.244449
## [dp_train] iter 1640 / 2000 loss = 0.047259
## [dp_train] iter 1641 / 2000 loss = 0.013332
## [dp_train] iter 1642 / 2000 loss = 0.025158
## [dp_train] iter 1643 / 2000 loss = 0.169009
## [dp_train] iter 1644 / 2000 loss = 0.014384
## [dp_train] iter 1645 / 2000 loss = 0.009800
## [dp_train] iter 1646 / 2000 loss = 0.019277
## [dp_train] iter 1647 / 2000 loss = 0.110118
## [dp_train] iter 1648 / 2000 loss = 0.060816
## [dp_train] iter 1649 / 2000 loss = 0.033470
## [dp_train] iter 1650 / 2000 loss = 0.178090
## [dp_train] iter 1651 / 2000 loss = 0.001500
## [dp_train] iter 1652 / 2000 loss = 0.011256
## [dp_train] iter 1653 / 2000 loss = 0.002323
## [dp_train] iter 1654 / 2000 loss = 0.004005
## [dp_train] iter 1655 / 2000 loss = 0.000799
## [dp_train] iter 1656 / 2000 loss = 0.000953
## [dp_train] iter 1657 / 2000 loss = 0.001013
## [dp_train] iter 1658 / 2000 loss = 0.002170
## [dp_train] iter 1659 / 2000 loss = 0.007765
## [dp_train] iter 1660 / 2000 loss = 0.005984
## [dp_train] iter 1661 / 2000 loss = 0.001231
## [dp_train] iter 1662 / 2000 loss = 0.001657
## [dp_train] iter 1663 / 2000 loss = 0.007799
## [dp_train] iter 1664 / 2000 loss = 0.002104
## [dp_train] iter 1665 / 2000 loss = 0.000556
## [dp_train] iter 1666 / 2000 loss = 0.000265
## [dp_train] iter 1667 / 2000 loss = 0.000566
## [dp_train] iter 1668 / 2000 loss = 0.001626
## [dp_train] iter 1669 / 2000 loss = 0.001995
## [dp_train] iter 1670 / 2000 loss = 0.000506
## [dp_train] iter 1671 / 2000 loss = 0.005682
## [dp_train] iter 1672 / 2000 loss = 0.000853
## [dp_train] iter 1673 / 2000 loss = 0.000212
## [dp_train] iter 1674 / 2000 loss = 0.007037
## [dp_train] iter 1675 / 2000 loss = 0.002284
## [dp_train] iter 1676 / 2000 loss = 0.015786
## [dp_train] iter 1677 / 2000 loss = 0.002894
## [dp_train] iter 1678 / 2000 loss = 0.001876
## [dp_train] iter 1679 / 2000 loss = 0.002556
## [dp_train] iter 1680 / 2000 loss = 0.003023
## [dp_train] iter 1681 / 2000 loss = 0.005870
## [dp_train] iter 1682 / 2000 loss = 0.005330
## [dp_train] iter 1683 / 2000 loss = 0.000138
## [dp_train] iter 1684 / 2000 loss = 0.000170
## [dp_train] iter 1685 / 2000 loss = 0.006136
## [dp_train] iter 1686 / 2000 loss = 0.003126
## [dp_train] iter 1687 / 2000 loss = 0.002452
## [dp_train] iter 1688 / 2000 loss = 0.000495
## [dp_train] iter 1689 / 2000 loss = 0.003234
## [dp_train] iter 1690 / 2000 loss = 0.002230
## [dp_train] iter 1691 / 2000 loss = 0.001036
## [dp_train] iter 1692 / 2000 loss = 0.240044
## [dp_train] iter 1693 / 2000 loss = 0.001020
## [dp_train] iter 1694 / 2000 loss = 0.002090
## [dp_train] iter 1695 / 2000 loss = 0.000793
## [dp_train] iter 1696 / 2000 loss = 0.007686
## [dp_train] iter 1697 / 2000 loss = 0.000377
## [dp_train] iter 1698 / 2000 loss = 0.001439
## [dp_train] iter 1699 / 2000 loss = 0.000717
## [dp_train] iter 1700 / 2000 loss = 0.001952
## [dp_train] iter 1701 / 2000 loss = 0.684182
## [dp_train] iter 1702 / 2000 loss = 0.756466
## [dp_train] iter 1703 / 2000 loss = 1.050347
## [dp_train] iter 1704 / 2000 loss = 0.143314
## [dp_train] iter 1705 / 2000 loss = 0.959080
## [dp_train] iter 1706 / 2000 loss = 0.308248
## [dp_train] iter 1707 / 2000 loss = 0.955375
## [dp_train] iter 1708 / 2000 loss = 0.100252
## [dp_train] iter 1709 / 2000 loss = 0.463481
## [dp_train] iter 1710 / 2000 loss = 0.217408
## [dp_train] iter 1711 / 2000 loss = 0.027053
## [dp_train] iter 1712 / 2000 loss = 0.538596
## [dp_train] iter 1713 / 2000 loss = 0.055245
## [dp_train] iter 1714 / 2000 loss = 0.567611
## [dp_train] iter 1715 / 2000 loss = 0.237611
## [dp_train] iter 1716 / 2000 loss = 0.417470
## [dp_train] iter 1717 / 2000 loss = 0.545913
## [dp_train] iter 1718 / 2000 loss = 0.108152
## [dp_train] iter 1719 / 2000 loss = 0.479511
## [dp_train] iter 1720 / 2000 loss = 0.077855
## [dp_train] iter 1721 / 2000 loss = 1.325551
## [dp_train] iter 1722 / 2000 loss = 0.181890
## [dp_train] iter 1723 / 2000 loss = 0.704467
## [dp_train] iter 1724 / 2000 loss = 0.220907
## [dp_train] iter 1725 / 2000 loss = 0.249891
## [dp_train] iter 1726 / 2000 loss = 0.354519
## [dp_train] iter 1727 / 2000 loss = 0.475160
## [dp_train] iter 1728 / 2000 loss = 1.081788
## [dp_train] iter 1729 / 2000 loss = 0.476329
## [dp_train] iter 1730 / 2000 loss = 0.072628
## [dp_train] iter 1731 / 2000 loss = 0.049586
## [dp_train] iter 1732 / 2000 loss = 0.041938
## [dp_train] iter 1733 / 2000 loss = 0.101343
## [dp_train] iter 1734 / 2000 loss = 0.725028
## [dp_train] iter 1735 / 2000 loss = 0.393680
## [dp_train] iter 1736 / 2000 loss = 0.516543
## [dp_train] iter 1737 / 2000 loss = 0.380957
## [dp_train] iter 1738 / 2000 loss = 0.118238
## [dp_train] iter 1739 / 2000 loss = 0.220818
## [dp_train] iter 1740 / 2000 loss = 0.078909
## [dp_train] iter 1741 / 2000 loss = 0.089082
## [dp_train] iter 1742 / 2000 loss = 0.278865
## [dp_train] iter 1743 / 2000 loss = 0.072706
## [dp_train] iter 1744 / 2000 loss = 0.034120
## [dp_train] iter 1745 / 2000 loss = 0.110051
## [dp_train] iter 1746 / 2000 loss = 0.174406
## [dp_train] iter 1747 / 2000 loss = 0.159408
## [dp_train] iter 1748 / 2000 loss = 0.141850
## [dp_train] iter 1749 / 2000 loss = 0.096701
## [dp_train] iter 1750 / 2000 loss = 0.120544
## [dp_train] iter 1751 / 2000 loss = 0.043171
## [dp_train] iter 1752 / 2000 loss = 0.442328
## [dp_train] iter 1753 / 2000 loss = 0.166751
## [dp_train] iter 1754 / 2000 loss = 0.368827
## [dp_train] iter 1755 / 2000 loss = 0.096827
## [dp_train] iter 1756 / 2000 loss = 0.131553
## [dp_train] iter 1757 / 2000 loss = 1.569437
## [dp_train] iter 1758 / 2000 loss = 0.343349
## [dp_train] iter 1759 / 2000 loss = 0.342514
## [dp_train] iter 1760 / 2000 loss = 0.057816
## [dp_train] iter 1761 / 2000 loss = 0.280710
## [dp_train] iter 1762 / 2000 loss = 0.275950
## [dp_train] iter 1763 / 2000 loss = 0.142531
## [dp_train] iter 1764 / 2000 loss = 0.349580
## [dp_train] iter 1765 / 2000 loss = 0.066017
## [dp_train] iter 1766 / 2000 loss = 0.079470
## [dp_train] iter 1767 / 2000 loss = 0.314455
## [dp_train] iter 1768 / 2000 loss = 0.092538
## [dp_train] iter 1769 / 2000 loss = 0.020305
## [dp_train] iter 1770 / 2000 loss = 1.313738
## [dp_train] iter 1771 / 2000 loss = 0.046608
## [dp_train] iter 1772 / 2000 loss = 0.239427
## [dp_train] iter 1773 / 2000 loss = 0.059496
## [dp_train] iter 1774 / 2000 loss = 0.320783
## [dp_train] iter 1775 / 2000 loss = 0.087293
## [dp_train] iter 1776 / 2000 loss = 0.217352
## [dp_train] iter 1777 / 2000 loss = 0.313266
## [dp_train] iter 1778 / 2000 loss = 0.293293
## [dp_train] iter 1779 / 2000 loss = 0.053300
## [dp_train] iter 1780 / 2000 loss = 0.363268
## [dp_train] iter 1781 / 2000 loss = 0.075775
## [dp_train] iter 1782 / 2000 loss = 0.115625
## [dp_train] iter 1783 / 2000 loss = 0.029385
## [dp_train] iter 1784 / 2000 loss = 0.570528
## [dp_train] iter 1785 / 2000 loss = 0.701978
## [dp_train] iter 1786 / 2000 loss = 0.011872
## [dp_train] iter 1787 / 2000 loss = 0.023180
## [dp_train] iter 1788 / 2000 loss = 0.148292
## [dp_train] iter 1789 / 2000 loss = 0.229186
## [dp_train] iter 1790 / 2000 loss = 0.041901
## [dp_train] iter 1791 / 2000 loss = 0.010266
## [dp_train] iter 1792 / 2000 loss = 0.021431
## [dp_train] iter 1793 / 2000 loss = 0.139456
## [dp_train] iter 1794 / 2000 loss = 0.011495
## [dp_train] iter 1795 / 2000 loss = 0.007525
## [dp_train] iter 1796 / 2000 loss = 0.015551
## [dp_train] iter 1797 / 2000 loss = 0.092665
## [dp_train] iter 1798 / 2000 loss = 0.053311
## [dp_train] iter 1799 / 2000 loss = 0.028103
## [dp_train] iter 1800 / 2000 loss = 0.159364
## [dp_train] iter 1801 / 2000 loss = 0.001253
## [dp_train] iter 1802 / 2000 loss = 0.009496
## [dp_train] iter 1803 / 2000 loss = 0.001867
## [dp_train] iter 1804 / 2000 loss = 0.003238
## [dp_train] iter 1805 / 2000 loss = 0.000655
## [dp_train] iter 1806 / 2000 loss = 0.000791
## [dp_train] iter 1807 / 2000 loss = 0.000796
## [dp_train] iter 1808 / 2000 loss = 0.001804
## [dp_train] iter 1809 / 2000 loss = 0.006129
## [dp_train] iter 1810 / 2000 loss = 0.005027
## [dp_train] iter 1811 / 2000 loss = 0.001061
## [dp_train] iter 1812 / 2000 loss = 0.001346
## [dp_train] iter 1813 / 2000 loss = 0.006475
## [dp_train] iter 1814 / 2000 loss = 0.001619
## [dp_train] iter 1815 / 2000 loss = 0.000474
## [dp_train] iter 1816 / 2000 loss = 0.000215
## [dp_train] iter 1817 / 2000 loss = 0.000463
## [dp_train] iter 1818 / 2000 loss = 0.001361
## [dp_train] iter 1819 / 2000 loss = 0.001779
## [dp_train] iter 1820 / 2000 loss = 0.000407
## [dp_train] iter 1821 / 2000 loss = 0.005060
## [dp_train] iter 1822 / 2000 loss = 0.000713
## [dp_train] iter 1823 / 2000 loss = 0.000163
## [dp_train] iter 1824 / 2000 loss = 0.006148
## [dp_train] iter 1825 / 2000 loss = 0.001890
## [dp_train] iter 1826 / 2000 loss = 0.013805
## [dp_train] iter 1827 / 2000 loss = 0.002453
## [dp_train] iter 1828 / 2000 loss = 0.001604
## [dp_train] iter 1829 / 2000 loss = 0.002194
## [dp_train] iter 1830 / 2000 loss = 0.002469
## [dp_train] iter 1831 / 2000 loss = 0.004945
## [dp_train] iter 1832 / 2000 loss = 0.004751
## [dp_train] iter 1833 / 2000 loss = 0.000109
## [dp_train] iter 1834 / 2000 loss = 0.000141
## [dp_train] iter 1835 / 2000 loss = 0.005232
## [dp_train] iter 1836 / 2000 loss = 0.002638
## [dp_train] iter 1837 / 2000 loss = 0.002167
## [dp_train] iter 1838 / 2000 loss = 0.000401
## [dp_train] iter 1839 / 2000 loss = 0.002592
## [dp_train] iter 1840 / 2000 loss = 0.001897
## [dp_train] iter 1841 / 2000 loss = 0.000862
## [dp_train] iter 1842 / 2000 loss = 0.210683
## [dp_train] iter 1843 / 2000 loss = 0.000798
## [dp_train] iter 1844 / 2000 loss = 0.001766
## [dp_train] iter 1845 / 2000 loss = 0.000647
## [dp_train] iter 1846 / 2000 loss = 0.006542
## [dp_train] iter 1847 / 2000 loss = 0.000310
## [dp_train] iter 1848 / 2000 loss = 0.001157
## [dp_train] iter 1849 / 2000 loss = 0.000615
## [dp_train] iter 1850 / 2000 loss = 0.001645
## [dp_train] iter 1851 / 2000 loss = 0.573325
## [dp_train] iter 1852 / 2000 loss = 0.673328
## [dp_train] iter 1853 / 2000 loss = 0.956574
## [dp_train] iter 1854 / 2000 loss = 0.129361
## [dp_train] iter 1855 / 2000 loss = 0.917196
## [dp_train] iter 1856 / 2000 loss = 0.277286
## [dp_train] iter 1857 / 2000 loss = 0.885976
## [dp_train] iter 1858 / 2000 loss = 0.086381
## [dp_train] iter 1859 / 2000 loss = 0.401229
## [dp_train] iter 1860 / 2000 loss = 0.195990
## [dp_train] iter 1861 / 2000 loss = 0.021823
## [dp_train] iter 1862 / 2000 loss = 0.485869
## [dp_train] iter 1863 / 2000 loss = 0.043528
## [dp_train] iter 1864 / 2000 loss = 0.524518
## [dp_train] iter 1865 / 2000 loss = 0.202029
## [dp_train] iter 1866 / 2000 loss = 0.349423
## [dp_train] iter 1867 / 2000 loss = 0.520252
## [dp_train] iter 1868 / 2000 loss = 0.087377
## [dp_train] iter 1869 / 2000 loss = 0.473790
## [dp_train] iter 1870 / 2000 loss = 0.063426
## [dp_train] iter 1871 / 2000 loss = 1.342374
## [dp_train] iter 1872 / 2000 loss = 0.148921
## [dp_train] iter 1873 / 2000 loss = 0.710201
## [dp_train] iter 1874 / 2000 loss = 0.187376
## [dp_train] iter 1875 / 2000 loss = 0.202715
## [dp_train] iter 1876 / 2000 loss = 0.302960
## [dp_train] iter 1877 / 2000 loss = 0.431287
## [dp_train] iter 1878 / 2000 loss = 1.066495
## [dp_train] iter 1879 / 2000 loss = 0.442722
## [dp_train] iter 1880 / 2000 loss = 0.058685
## [dp_train] iter 1881 / 2000 loss = 0.040064
## [dp_train] iter 1882 / 2000 loss = 0.033394
## [dp_train] iter 1883 / 2000 loss = 0.082813
## [dp_train] iter 1884 / 2000 loss = 0.753114
## [dp_train] iter 1885 / 2000 loss = 0.371765
## [dp_train] iter 1886 / 2000 loss = 0.461874
## [dp_train] iter 1887 / 2000 loss = 0.334118
## [dp_train] iter 1888 / 2000 loss = 0.103775
## [dp_train] iter 1889 / 2000 loss = 0.187296
## [dp_train] iter 1890 / 2000 loss = 0.068338
## [dp_train] iter 1891 / 2000 loss = 0.076326
## [dp_train] iter 1892 / 2000 loss = 0.241973
## [dp_train] iter 1893 / 2000 loss = 0.059325
## [dp_train] iter 1894 / 2000 loss = 0.028498
## [dp_train] iter 1895 / 2000 loss = 0.094453
## [dp_train] iter 1896 / 2000 loss = 0.145127
## [dp_train] iter 1897 / 2000 loss = 0.134332
## [dp_train] iter 1898 / 2000 loss = 0.115060
## [dp_train] iter 1899 / 2000 loss = 0.085966
## [dp_train] iter 1900 / 2000 loss = 0.101845
## [dp_train] iter 1901 / 2000 loss = 0.032482
## [dp_train] iter 1902 / 2000 loss = 0.384662
## [dp_train] iter 1903 / 2000 loss = 0.143425
## [dp_train] iter 1904 / 2000 loss = 0.333326
## [dp_train] iter 1905 / 2000 loss = 0.076765
## [dp_train] iter 1906 / 2000 loss = 0.110660
## [dp_train] iter 1907 / 2000 loss = 1.486344
## [dp_train] iter 1908 / 2000 loss = 0.316990
## [dp_train] iter 1909 / 2000 loss = 0.292638
## [dp_train] iter 1910 / 2000 loss = 0.049627
## [dp_train] iter 1911 / 2000 loss = 0.268334
## [dp_train] iter 1912 / 2000 loss = 0.242077
## [dp_train] iter 1913 / 2000 loss = 0.125393
## [dp_train] iter 1914 / 2000 loss = 0.288069
## [dp_train] iter 1915 / 2000 loss = 0.049234
## [dp_train] iter 1916 / 2000 loss = 0.068002
## [dp_train] iter 1917 / 2000 loss = 0.299377
## [dp_train] iter 1918 / 2000 loss = 0.090146
## [dp_train] iter 1919 / 2000 loss = 0.014810
## [dp_train] iter 1920 / 2000 loss = 1.302593
## [dp_train] iter 1921 / 2000 loss = 0.040106
## [dp_train] iter 1922 / 2000 loss = 0.205164
## [dp_train] iter 1923 / 2000 loss = 0.050959
## [dp_train] iter 1924 / 2000 loss = 0.300347
## [dp_train] iter 1925 / 2000 loss = 0.080723
## [dp_train] iter 1926 / 2000 loss = 0.221066
## [dp_train] iter 1927 / 2000 loss = 0.298154
## [dp_train] iter 1928 / 2000 loss = 0.282997
## [dp_train] iter 1929 / 2000 loss = 0.043216
## [dp_train] iter 1930 / 2000 loss = 0.381749
## [dp_train] iter 1931 / 2000 loss = 0.068928
## [dp_train] iter 1932 / 2000 loss = 0.126407
## [dp_train] iter 1933 / 2000 loss = 0.022927
## [dp_train] iter 1934 / 2000 loss = 0.581742
## [dp_train] iter 1935 / 2000 loss = 0.674580
## [dp_train] iter 1936 / 2000 loss = 0.009957
## [dp_train] iter 1937 / 2000 loss = 0.019213
## [dp_train] iter 1938 / 2000 loss = 0.140909
## [dp_train] iter 1939 / 2000 loss = 0.216662
## [dp_train] iter 1940 / 2000 loss = 0.037879
## [dp_train] iter 1941 / 2000 loss = 0.008086
## [dp_train] iter 1942 / 2000 loss = 0.018593
## [dp_train] iter 1943 / 2000 loss = 0.117646
## [dp_train] iter 1944 / 2000 loss = 0.009378
## [dp_train] iter 1945 / 2000 loss = 0.005979
## [dp_train] iter 1946 / 2000 loss = 0.012810
## [dp_train] iter 1947 / 2000 loss = 0.078722
## [dp_train] iter 1948 / 2000 loss = 0.047617
## [dp_train] iter 1949 / 2000 loss = 0.023950
## [dp_train] iter 1950 / 2000 loss = 0.143449
## [dp_train] iter 1951 / 2000 loss = 0.001071
## [dp_train] iter 1952 / 2000 loss = 0.008132
## [dp_train] iter 1953 / 2000 loss = 0.001525
## [dp_train] iter 1954 / 2000 loss = 0.002647
## [dp_train] iter 1955 / 2000 loss = 0.000546
## [dp_train] iter 1956 / 2000 loss = 0.000672
## [dp_train] iter 1957 / 2000 loss = 0.000642
## [dp_train] iter 1958 / 2000 loss = 0.001533
## [dp_train] iter 1959 / 2000 loss = 0.004910
## [dp_train] iter 1960 / 2000 loss = 0.004259
## [dp_train] iter 1961 / 2000 loss = 0.000938
## [dp_train] iter 1962 / 2000 loss = 0.001124
## [dp_train] iter 1963 / 2000 loss = 0.005455
## [dp_train] iter 1964 / 2000 loss = 0.001260
## [dp_train] iter 1965 / 2000 loss = 0.000415
## [dp_train] iter 1966 / 2000 loss = 0.000178
## [dp_train] iter 1967 / 2000 loss = 0.000391
## [dp_train] iter 1968 / 2000 loss = 0.001169
## [dp_train] iter 1969 / 2000 loss = 0.001647
## [dp_train] iter 1970 / 2000 loss = 0.000346
## [dp_train] iter 1971 / 2000 loss = 0.004571
## [dp_train] iter 1972 / 2000 loss = 0.000611
## [dp_train] iter 1973 / 2000 loss = 0.000128
## [dp_train] iter 1974 / 2000 loss = 0.005427
## [dp_train] iter 1975 / 2000 loss = 0.001593
## [dp_train] iter 1976 / 2000 loss = 0.012114
## [dp_train] iter 1977 / 2000 loss = 0.002109
## [dp_train] iter 1978 / 2000 loss = 0.001401
## [dp_train] iter 1979 / 2000 loss = 0.001917
## [dp_train] iter 1980 / 2000 loss = 0.002055
## [dp_train] iter 1981 / 2000 loss = 0.004204
## [dp_train] iter 1982 / 2000 loss = 0.004299
## [dp_train] iter 1983 / 2000 loss = 0.000089
## [dp_train] iter 1984 / 2000 loss = 0.000120
## [dp_train] iter 1985 / 2000 loss = 0.004485
## [dp_train] iter 1986 / 2000 loss = 0.002240
## [dp_train] iter 1987 / 2000 loss = 0.001953
## [dp_train] iter 1988 / 2000 loss = 0.000333
## [dp_train] iter 1989 / 2000 loss = 0.002088
## [dp_train] iter 1990 / 2000 loss = 0.001646
## [dp_train] iter 1991 / 2000 loss = 0.000730
## [dp_train] iter 1992 / 2000 loss = 0.183605
## [dp_train] iter 1993 / 2000 loss = 0.000635
## [dp_train] iter 1994 / 2000 loss = 0.001532
## [dp_train] iter 1995 / 2000 loss = 0.000556
## [dp_train] iter 1996 / 2000 loss = 0.005597
## [dp_train] iter 1997 / 2000 loss = 0.000266
## [dp_train] iter 1998 / 2000 loss = 0.000953
## [dp_train] iter 1999 / 2000 loss = 0.000540
## [dp_train] iter 2000 / 2000 loss = 0.001422
## Final loss: 1.114629 1.138763 1.035583 1.014455 1.003183 0.9951209 0.918157 0.9645083 0.9732139 0.9455289 0.9780111 0.8611306 0.9210531 0.8511042 0.9349498 0.8029733 0.8289063 0.8151123 0.8322181 0.7098701 0.8406761 0.7201826 0.6293554 0.7750014 0.6793629 0.7814005 0.6908477 0.6764779 0.686359 0.6334033 0.6577112 0.7098409 0.3935117 0.4177076 0.6261708 0.6025012 0.6286196 0.4764094 0.5423288 0.5362366 0.4829664 0.8348875 0.437036 0.4874979 0.3743944 0.5451774 0.3439388 0.4228253 0.3782933 0.4435544 0.9268304 0.9846727 0.9822683 1.58973 1.178875 1.209938 0.9030243 1.537584 1.054673 1.322923 1.688987 1.082155 1.523728 1.02381 1.242522 0.8819958 1.082748 1.200761 1.455653 1.298776 0.8827 1.158856 1.13159 1.008835 0.9629178 0.8643625 0.8655594 0.7235734 0.9584908 1.233385 1.252715 1.245844 1.130783 0.9055931 1.065443 0.820204 0.6510535 1.117491 1.087492 1.148101 1.078483 0.8090315 1.072865 1.225861 1.054751 1.017171 1.001981 0.8290007 1.19181 1.01213 1.777055 1.144097 1.541333 1.333697 1.459265 1.630054 1.128835 1.376752 0.9846159 1.877225 1.346935 0.9837102 1.161084 0.9275727 1.026122 1.235029 1.089501 1.750204 0.7975306 0.7202159 1.033232 0.9711962 0.7433442 0.7515407 1.016662 0.8905092 0.773419 0.9015828 0.6944093 0.6978637 0.524983 1.000413 0.5990184 0.735579 0.7257251 0.4015306 0.8346013 0.750487 0.7919936 0.515346 0.4946033 0.437126 0.6488111 0.4931038 0.5046304 0.3878454 0.4079372 0.4723148 0.6739617 0.7209155 0.3414791 0.4568772 0.3624969 0.3842471 0.28934 0.263448 0.3046031 0.3442468 0.3783898 0.3855559 0.2649082 0.3041001 0.3665409 0.2709165 0.224308 0.1672072 0.2031883 0.2627502 0.2530521 0.1736234 0.3115783 0.1936858 0.1516612 0.3172224 0.2402275 0.329895 0.2467775 0.2053503 0.2229382 0.222684 0.247791 0.2519854 0.06816707 0.07441237 0.2303701 0.1949452 0.1765414 0.1101166 0.1615453 0.1657107 0.1328397 0.4190763 0.1213384 0.156946 0.09697856 0.1881584 0.07525401 0.1241345 0.08984136 0.1349614 1.232185 1.110495 1.313281 1.276473 1.326556 1.080615 1.038684 1.311974 1.204375 1.169826 1.279469 1.055936 1.245835 1.038558 1.173008 1.096789 1.020457 1.058166 1.382732 1.099261 0.9558754 1.074612 1.161347 0.9682499 1.042399 1.052371 1.155802 1.094569 0.9667389 1.066371 1.034995 1.025536 1.002712 0.9359207 1.00897 0.9269844 0.9183524 1.03998 1.04399 0.9691694 0.9334701 0.8544824 0.9359222 1.001629 0.9337277 0.9734356 0.929807 0.8578407 1.054668 0.919176 0.6974823 0.7502512 0.504056 0.712867 0.5957619 0.4494481 1.073775 0.5105725 0.532468 0.5431112 0.642289 0.5550634 0.4735899 0.650764 0.5817048 0.5432559 0.590149 0.5041762 0.2152856 0.6017045 0.3772102 0.7055578 0.2369375 0.4952872 0.4434651 0.3757979 0.5140189 0.5950996 0.3631628 0.338623 0.2088044 0.3068052 0.3055858 0.5369103 0.586601 0.1150968 0.377341 0.44061 0.5524111 0.221171 0.1963639 0.1770557 0.4877724 0.1922477 0.1864245 0.1722868 0.2812003 0.2551152 0.3393599 0.5176264 0.09226657 0.1632247 0.1072193 0.1213351 0.07146422 0.06583022 0.08466863 0.1023509 0.1308931 0.1336444 0.07098735 0.09017601 0.1290288 0.07689075 0.0510058 0.03403263 0.04895216 0.07939432 0.07663501 0.04545433 0.1157753 0.05684154 0.03491583 0.1339338 0.08176411 0.1431204 0.09003904 0.06816296 0.07738723 0.08061235 0.09702245 0.1002343 0.01682274 0.0180911 0.09284722 0.07374265 0.06354546 0.03333123 0.06052253 0.06189813 0.04579912 0.2953042 0.042164 0.06403062 0.03569377 0.08261702 0.02490641 0.04712333 0.03203125 0.05421922 1.510402 1.261568 1.614395 1.033103 1.445791 0.9838213 1.195374 1.072438 1.287118 0.9929654 0.9051574 1.042266 0.99429 1.033541 1.0526 1.206279 0.9577796 0.9072895 1.252892 0.8916162 1.027878 0.9763836 1.125947 0.8887013 1.020827 1.105669 1.229346 1.287556 0.9266155 0.8681037 0.7982864 0.7874362 0.8516119 0.8883167 0.9054768 0.9417855 0.9939798 0.8644588 0.9327112 0.7610338 0.7566748 0.8122221 0.7663823 0.7397049 0.7785629 0.8683583 0.8110551 0.7794282 0.84972 0.7830446 0.4385988 0.6987623 0.3469217 0.6028223 0.4285671 0.2852235 1.122328 0.3836421 0.4949544 0.2932951 0.4693013 0.4902207 0.3429573 0.6310969 0.473675 0.3690745 0.4767174 0.272723 0.1422312 0.6799755 0.2354673 0.6363799 0.1625823 0.4514959 0.2950238 0.2666642 0.4600312 0.5031182 0.2862527 0.2682407 0.1555497 0.175104 0.2326463 0.5051696 0.5980507 0.06851724 0.2367251 0.3439545 0.4675156 0.1515314 0.1250552 0.11663 0.4420004 0.1197662 0.1096808 0.1186108 0.2615101 0.1950494 0.2243207 0.4364864 0.03757972 0.08546111 0.04551308 0.05674424 0.02768148 0.02606256 0.03316466 0.04313132 0.06818577 0.06706341 0.02926301 0.03736231 0.06722803 0.03403042 0.01872968 0.01161022 0.01897164 0.03504529 0.03387277 0.01843591 0.05925222 0.02485234 0.01274903 0.07114019 0.0378089 0.08544635 0.04411857 0.0320711 0.03732904 0.04038322 0.0538584 0.05342826 0.006532758 0.006942802 0.05229627 0.03756707 0.03181827 0.01458998 0.03225222 0.03080384 0.02185319 0.2764742 0.02048889 0.03453269 0.0177773 0.05006694 0.01149685 0.02401875 0.01559211 0.02770108 1.552223 1.281699 1.674122 0.7849295 1.413463 0.8611244 1.249711 0.8143006 1.229274 0.8176469 0.5901713 0.9824708 0.7450785 0.9660019 0.9117496 1.169909 0.885968 0.7451158 1.047725 0.6828621 1.06132 0.848065 1.003125 0.7702687 0.9267321 1.038988 1.158157 1.32038 0.8504993 0.6702273 0.5778484 0.5643823 0.685413 0.7956262 0.7909085 0.9067343 0.9550054 0.6606339 0.7978759 0.564183 0.576238 0.7346694 0.5875445 0.4969047 0.6118501 0.7412327 0.676624 0.6745739 0.6378598 0.6281189 0.3341769 0.7248016 0.3073271 0.5922717 0.3754569 0.2468315 1.255722 0.3681263 0.5412913 0.2137527 0.4080386 0.4979703 0.3016765 0.6795511 0.423267 0.2956421 0.4451017 0.1985233 0.1237829 0.8366628 0.179926 0.608842 0.1419752 0.4566451 0.2296823 0.2352303 0.4506811 0.4651518 0.2556433 0.2546092 0.138957 0.1314437 0.199324 0.5166973 0.6551925 0.05104316 0.1678987 0.2980531 0.4227478 0.1198614 0.09007495 0.08790904 0.4187111 0.08503633 0.07372443 0.0912795 0.2583583 0.1633263 0.1629731 0.3872313 0.0192426 0.05762524 0.02595085 0.03453731 0.01372421 0.01388583 0.01668295 0.02316756 0.04693216 0.04233307 0.01544062 0.01958777 0.04355164 0.02031974 0.009034575 0.005359093 0.009653237 0.01869925 0.01857603 0.009573528 0.03566421 0.01350448 0.005960814 0.04382349 0.0213659 0.06086128 0.02543184 0.01803434 0.02130031 0.02524105 0.03593489 0.03311614 0.003250151 0.003398761 0.03524859 0.02336233 0.01900705 0.007549808 0.02155814 0.01805843 0.01211298 0.2974422 0.01208007 0.02090928 0.01044225 0.03587408 0.00632196 0.01461815 0.008832124 0.01651493 1.52086 1.262271 1.657097 0.5793503 1.354177 0.738769 1.262812 0.5909321 1.137545 0.6546445 0.3626353 0.9225008 0.5216731 0.8950158 0.7691522 1.093417 0.8063108 0.5849851 0.8653957 0.5025292 1.094612 0.7236001 0.8860282 0.6567444 0.8212243 0.9445951 1.060309 1.313132 0.7795323 0.495985 0.3984654 0.3836016 0.5325892 0.7165499 0.6862306 0.860126 0.8810026 0.4984334 0.6722468 0.4039659 0.4245547 0.6617664 0.434794 0.3114943 0.4679029 0.617666 0.5532624 0.5733165 0.456389 0.4925399 0.2611784 0.75686 0.2849281 0.5902731 0.3352179 0.2354689 1.436291 0.3685599 0.5855901 0.1714881 0.3766978 0.5045661 0.2727139 0.7381266 0.3690184 0.2433952 0.4236741 0.1597513 0.1082513 1.017749 0.1460103 0.5843484 0.1258522 0.4614113 0.1908426 0.2187682 0.4434062 0.4362721 0.2264744 0.2565611 0.1250225 0.1085389 0.168998 0.5337977 0.7158135 0.04121346 0.1221457 0.2642419 0.3890127 0.0987158 0.06608408 0.06926244 0.3942991 0.06177995 0.0514945 0.07089567 0.2502107 0.1388074 0.1218343 0.3487936 0.01144655 0.04432872 0.01700571 0.02405559 0.007705269 0.008392468 0.009791918 0.01443921 0.03699272 0.03030775 0.00933404 0.01222885 0.03314037 0.01441422 0.005002225 0.002865216 0.005673021 0.01121138 0.01154972 0.005562056 0.02389948 0.008078152 0.003199987 0.03077562 0.01405124 0.04913246 0.01612868 0.01124349 0.01366764 0.01744046 0.02645831 0.02230452 0.001812766 0.001891429 0.02605106 0.01609379 0.01217867 0.004362702 0.01629462 0.01177054 0.007348457 0.3347896 0.007927389 0.01364782 0.006620727 0.02793145 0.003732249 0.009703918 0.005316315 0.01092505 1.45304 1.224515 1.614151 0.4261043 1.299294 0.6314956 1.259786 0.4148773 1.023973 0.5228743 0.2167578 0.8663781 0.3595234 0.8365822 0.6453791 1.004766 0.7393398 0.4527438 0.7292133 0.3656172 1.136305 0.611653 0.8015392 0.5628917 0.7137034 0.8506499 0.950018 1.284359 0.7249047 0.3626531 0.2726159 0.2581359 0.4121149 0.6710873 0.599525 0.8203775 0.8105445 0.3811604 0.5673935 0.2902836 0.3132746 0.6013842 0.322233 0.1951561 0.3587791 0.5146647 0.4536644 0.4894017 0.3273499 0.388905 0.2026396 0.7679077 0.270133 0.5747814 0.2904421 0.2198692 1.602901 0.3715564 0.5957661 0.1401863 0.3545577 0.4910281 0.2489974 0.7559905 0.3099335 0.2020458 0.4003369 0.1348176 0.08953376 1.171643 0.1218503 0.5500606 0.1151229 0.4533035 0.1642612 0.2093248 0.4278075 0.4083666 0.1944797 0.2619893 0.1150982 0.09706338 0.1391572 0.5445363 0.7603404 0.03386689 0.09176766 0.236248 0.3580349 0.0852812 0.04933697 0.05711834 0.3620281 0.04579728 0.03721125 0.05585098 0.2329919 0.1186415 0.09399108 0.3143156 0.007250848 0.03502748 0.01161964 0.01731382 0.004662916 0.005345553 0.006111287 0.009717095 0.0294922 0.02226282 0.00596689 0.008164947 0.0260884 0.01061757 0.002983656 0.001674537 0.003541278 0.007253185 0.007831114 0.003396695 0.01732841 0.005069355 0.001828134 0.02270945 0.009741713 0.04057686 0.01127318 0.007438166 0.009534978 0.01249517 0.01994254 0.01607777 0.001074722 0.001162595 0.01964661 0.01161724 0.008280569 0.002725961 0.01265224 0.008285094 0.004710346 0.3612177 0.005422314 0.009194307 0.004306286 0.02226143 0.002299101 0.006675412 0.00341588 0.007605777 1.352172 1.168869 1.545409 0.3220337 1.245703 0.5437461 1.238698 0.2960919 0.9160388 0.4246773 0.1325404 0.8093628 0.2484062 0.7903423 0.5422621 0.9048198 0.6866637 0.3503538 0.6395024 0.2691889 1.17861 0.5099551 0.7496864 0.4908637 0.6174541 0.7564943 0.8511418 1.252785 0.6793737 0.2659279 0.1913494 0.1778111 0.3190433 0.6517623 0.5377109 0.7764671 0.7343943 0.3007168 0.4810173 0.2156087 0.2375623 0.5460043 0.2444473 0.1283756 0.28221 0.4308768 0.3765899 0.4130743 0.2421875 0.3126369 0.1556614 0.7429421 0.2539857 0.5449805 0.243353 0.2065203 1.722494 0.368687 0.5751133 0.1190891 0.3377979 0.4595715 0.2305692 0.7329981 0.2527589 0.1711395 0.3740122 0.1201646 0.06994099 1.263562 0.1030821 0.5036036 0.1036334 0.4346446 0.1439386 0.2053 0.4095129 0.3824634 0.162246 0.2730033 0.1080455 0.09264026 0.1109763 0.5489908 0.777601 0.02814924 0.07037485 0.2118432 0.3280313 0.07444424 0.03709798 0.04759764 0.3241844 0.03558546 0.02814512 0.04400636 0.2067054 0.1019498 0.07415055 0.2820364 0.004857083 0.0277625 0.008055974 0.01264494 0.00296915 0.003513509 0.003982336 0.006756625 0.02308808 0.01655331 0.003960368 0.005577285 0.02040462 0.007714995 0.001876005 0.001038899 0.002269935 0.004936446 0.005516294 0.002130683 0.0129861 0.003251425 0.00106515 0.0170287 0.006883167 0.03343064 0.008084272 0.00521554 0.006847869 0.009020948 0.01502095 0.01202452 0.0006572976 0.0007337541 0.01496623 0.008531903 0.00588933 0.001752846 0.009707132 0.005982624 0.003199757 0.3649021 0.003740611 0.006295939 0.002859646 0.01767195 0.001446836 0.004660892 0.002277013 0.005410511 1.229014 1.099263 1.462979 0.254668 1.198063 0.4758208 1.200789 0.2212857 0.8142241 0.3558971 0.08589967 0.7553558 0.1774467 0.7551613 0.4564009 0.7983742 0.6480854 0.2743469 0.5820774 0.2033193 1.216401 0.4236747 0.7301155 0.4295287 0.5312777 0.6634994 0.7612357 1.216952 0.6349324 0.1980764 0.1391662 0.1269762 0.2488501 0.6442869 0.4927595 0.7269513 0.655188 0.2402268 0.409043 0.1658983 0.1849473 0.4874665 0.1856162 0.09036388 0.2278191 0.3610325 0.3128885 0.3399599 0.1890037 0.2527589 0.119817 0.6981566 0.2393763 0.508135 0.2035439 0.194957 1.788762 0.3731436 0.5330166 0.1037999 0.3252316 0.4232664 0.2128139 0.6793162 0.2006413 0.1474842 0.3620276 0.1121013 0.05679351 1.336468 0.08789813 0.4470639 0.09504275 0.4113204 0.128424 0.2052961 0.388433 0.3589737 0.132895 0.2874613 0.1019402 0.09303958 0.087148 0.5459685 0.7764749 0.02379831 0.05548201 0.1930185 0.3029502 0.06588347 0.02833905 0.04036952 0.2833918 0.02824085 0.02144142 0.03548315 0.1794856 0.08768938 0.05946882 0.2529595 0.003411765 0.02184358 0.005667 0.009258136 0.001966075 0.002353414 0.002683764 0.004819989 0.01755339 0.01238051 0.002722507 0.00391967 0.01583837 0.005497617 0.001262069 0.0006703405 0.001491976 0.00351623 0.003996076 0.001370666 0.009992918 0.002153809 0.0006406739 0.01307074 0.00498453 0.02739751 0.005954107 0.003820833 0.005086941 0.006644594 0.01156171 0.009322494 0.0004094959 0.0004758494 0.01167627 0.006388113 0.004456154 0.001198367 0.00732585 0.004462114 0.002281178 0.3448663 0.002616029 0.004448461 0.00195419 0.0140556 0.0009611486 0.003336384 0.001570186 0.003986297 1.090981 1.019501 1.365297 0.2104546 1.136895 0.421051 1.150291 0.1737216 0.7162802 0.3067436 0.05967542 0.7049025 0.1302326 0.7144811 0.3859425 0.6929044 0.6174646 0.215481 0.5447523 0.1581595 1.253532 0.345813 0.7156358 0.3707081 0.4531502 0.5757636 0.6788658 1.183948 0.5915789 0.1504778 0.105373 0.09392896 0.1955676 0.6484534 0.4596519 0.6774537 0.5804503 0.1933345 0.3492035 0.133783 0.1494725 0.4299361 0.1437976 0.06776103 0.1864473 0.3032029 0.261623 0.275015 0.1539077 0.2067671 0.09254051 0.641727 0.2204768 0.4636607 0.1686205 0.1783289 1.788406 0.3688584 0.4790696 0.0889831 0.3106237 0.3856883 0.1923948 0.6055307 0.1564962 0.1256217 0.3481494 0.1047574 0.04482503 1.370312 0.07408978 0.3913251 0.08528142 0.3883269 0.1147776 0.2064495 0.3675613 0.3374728 0.1065224 0.3045805 0.09514817 0.09614601 0.06717067 0.5422622 0.7755678 0.02005139 0.04406312 0.1783077 0.2807599 0.05843273 0.02189206 0.03418792 0.2422856 0.02239511 0.01635226 0.02873243 0.1518306 0.07704346 0.04848722 0.2258711 0.002492094 0.01722866 0.004081994 0.006843262 0.001398416 0.001644568 0.001870359 0.003568367 0.01322831 0.009488397 0.001960985 0.00284557 0.01230774 0.003923562 0.0009193409 0.0004703058 0.00102779 0.002615629 0.003048208 0.0009274651 0.008018115 0.001484485 0.0004212197 0.01031716 0.003737308 0.02242557 0.004541765 0.002921758 0.003912253 0.004983217 0.009069025 0.007486172 0.0002690143 0.0003241215 0.009241078 0.00492137 0.003528881 0.0008588311 0.005508205 0.003443773 0.001695377 0.3101009 0.001861954 0.003306126 0.001385208 0.01133738 0.0006743724 0.002449861 0.001136718 0.00304504 0.947462 0.9325931 1.259748 0.1831483 1.076447 0.3794046 1.090179 0.1423881 0.6242811 0.2724752 0.04406891 0.6553387 0.09650327 0.6709046 0.3279037 0.591509 0.5955949 0.170562 0.517174 0.1237263 1.288748 0.2809225 0.7095838 0.3161009 0.3789743 0.4935662 0.6026216 1.147923 0.5555336 0.1165152 0.08042142 0.07029906 0.15553 0.6716645 0.4360885 0.6255444 0.5077559 0.1606715 0.2997187 0.1102516 0.1252538 0.376221 0.1131992 0.05267986 0.1543166 0.2559736 0.220829 0.2224429 0.1294452 0.1714786 0.07158179 0.5684246 0.2013302 0.4258322 0.1401929 0.1631565 1.736798 0.3628436 0.4336122 0.07671377 0.2979985 0.3479306 0.1744211 0.5116925 0.1177777 0.1071244 0.3359718 0.1000852 0.03519913 1.355096 0.06330169 0.3347662 0.07691846 0.3678748 0.1038916 0.2098522 0.3502586 0.3184296 0.08486431 0.3250442 0.08950094 0.1015975 0.05155556 0.5481111 0.7593016 0.01718699 0.03520204 0.166957 0.2618921 0.05256826 0.01709326 0.02941102 0.2037655 0.01796449 0.01267343 0.02361771 0.1295606 0.06849034 0.04008154 0.2013089 0.001903221 0.01378137 0.00303013 0.005167769 0.001036846 0.001213501 0.00135347 0.002738348 0.01003287 0.007420705 0.001512135 0.002134033 0.009707496 0.002829125 0.0007023729 0.0003481189 0.0007357273 0.002027774 0.002425314 0.0006735335 0.006657357 0.001101935 0.0002924457 0.008432883 0.002878381 0.01872666 0.003577286 0.002300118 0.003118326 0.003842773 0.00723297 0.006225712 0.0001870102 0.0002297872 0.007464124 0.003882796 0.002891659 0.0006407005 0.004180836 0.00273547 0.001304237 0.2743841 0.001366293 0.002565536 0.001019133 0.009293466 0.0004961619 0.001855257 0.0008738071 0.002399261 0.8084416 0.8428562 1.150162 0.1617176 1.015188 0.3437015 1.023224 0.1184616 0.537612 0.2433174 0.03450729 0.5970277 0.07190417 0.6193388 0.2783811 0.4971949 0.5696965 0.1347713 0.4889178 0.09708018 1.309279 0.2248591 0.6988525 0.2641627 0.3073478 0.416752 0.5298701 1.104746 0.5149817 0.09115448 0.06222809 0.05352317 0.1245318 0.6963855 0.4158555 0.569086 0.435621 0.1350571 0.2569304 0.0920519 0.1045522 0.32196 0.08952469 0.04157221 0.1288982 0.2104673 0.186721 0.1762191 0.11087 0.1426503 0.05689872 0.5047857 0.1893541 0.4022513 0.1194982 0.151912 1.656074 0.3619319 0.3891553 0.06747239 0.2921445 0.3119152 0.1607453 0.4203897 0.08890654 0.09323214 0.3293869 0.09641861 0.02789256 1.336109 0.05487669 0.2817293 0.0697417 0.3458383 0.09591413 0.2157786 0.3308816 0.3064612 0.067202 0.3475502 0.08409849 0.1079939 0.03880699 0.5610561 0.731221 0.01442873 0.02851195 0.1579096 0.244449 0.0472589 0.01333167 0.02515776 0.1690087 0.01438376 0.009799697 0.01927726 0.1101183 0.06081554 0.03347032 0.1780902 0.001500461 0.01125592 0.002323381 0.004005082 0.000799207 0.0009530863 0.001012957 0.002170325 0.007765015 0.005984288 0.001231447 0.001656593 0.007799271 0.002104349 0.0005564742 0.0002645951 0.0005655537 0.001625992 0.001995162 0.0005061659 0.005682299 0.0008533355 0.000211853 0.007036508 0.002283943 0.01578588 0.002893843 0.001876459 0.002555866 0.003022792 0.005869803 0.005330194 0.0001380259 0.000169839 0.00613579 0.003126341 0.002452206 0.0004945991 0.003234294 0.002230178 0.001035575 0.2400438 0.001020024 0.002089502 0.0007927469 0.007686357 0.0003769142 0.001438743 0.0007165087 0.00195202 0.6841818 0.7564664 1.050347 0.1433142 0.9590801 0.308248 0.9553755 0.1002518 0.4634809 0.2174082 0.02705294 0.5385959 0.05524468 0.5676114 0.2376113 0.41747 0.5459133 0.1081518 0.4795106 0.07785468 1.325551 0.1818904 0.7044671 0.2209065 0.2498908 0.3545192 0.4751602 1.081788 0.4763294 0.07262769 0.04958589 0.0419383 0.1013434 0.7250281 0.3936802 0.5165431 0.3809569 0.1182377 0.2208178 0.07890885 0.08908232 0.278865 0.07270631 0.03411982 0.1100512 0.1744065 0.1594079 0.1418499 0.0967006 0.1205444 0.04317115 0.4423277 0.1667514 0.3688275 0.09682683 0.1315526 1.569437 0.3433494 0.3425142 0.0578162 0.28071 0.2759504 0.1425311 0.3495805 0.06601723 0.07947034 0.3144554 0.0925383 0.02030519 1.313738 0.04660802 0.239427 0.05949564 0.320783 0.08729331 0.2173516 0.3132662 0.2932925 0.05330017 0.3632683 0.07577509 0.1156254 0.02938533 0.5705284 0.7019778 0.01187204 0.0231796 0.1482917 0.2291859 0.04190127 0.01026564 0.0214307 0.1394564 0.01149467 0.007525017 0.0155509 0.09266527 0.05331133 0.02810322 0.1593644 0.001252664 0.009496042 0.001866698 0.003237716 0.0006548503 0.0007906654 0.0007960721 0.001804187 0.00612914 0.00502713 0.001061455 0.00134607 0.006474971 0.001618628 0.0004742642 0.0002147869 0.0004634635 0.001360662 0.001779 0.0004069785 0.005059795 0.0007132802 0.0001631637 0.006148227 0.001889935 0.0138051 0.002453085 0.001603605 0.002193812 0.002469455 0.004944704 0.004751028 0.0001090636 0.0001408982 0.005231817 0.002637786 0.002166733 0.0004014475 0.002592209 0.001896911 0.0008619052 0.210683 0.0007984969 0.001766185 0.0006474854 0.006542151 0.0003103671 0.00115738 0.0006148412 0.001645182 0.5733245 0.6733281 0.956574 0.129361 0.9171963 0.2772855 0.885976 0.08638121 0.4012295 0.19599 0.02182328 0.4858695 0.04352783 0.5245183 0.2020294 0.3494229 0.5202518 0.08737656 0.4737896 0.06342619 1.342374 0.1489211 0.7102015 0.1873762 0.2027149 0.30296 0.4312866 1.066495 0.4427216 0.05868472 0.04006403 0.03339372 0.08281253 0.7531142 0.3717649 0.4618739 0.3341183 0.1037753 0.1872959 0.06833828 0.07632606 0.2419731 0.05932533 0.02849774 0.09445274 0.1451272 0.1343317 0.11506 0.08596594 0.101845 0.03248184 0.3846616 0.1434255 0.3333262 0.07676475 0.1106595 1.486344 0.3169904 0.2926376 0.04962737 0.2683344 0.2420769 0.1253932 0.2880688 0.04923366 0.06800241 0.2993767 0.09014614 0.01480953 1.302593 0.04010631 0.2051636 0.05095909 0.3003468 0.08072264 0.2210664 0.2981542 0.2829967 0.04321617 0.3817486 0.068928 0.1264066 0.02292725 0.5817421 0.6745795 0.009956689 0.01921337 0.1409086 0.2166619 0.03787901 0.008086227 0.01859287 0.1176461 0.009377797 0.005978876 0.01281038 0.07872226 0.04761738 0.02395025 0.1434494 0.001071115 0.008131947 0.001524982 0.002646824 0.0005458544 0.0006724045 0.0006420573 0.001532525 0.004909936 0.004259408 0.0009380118 0.001124125 0.005455067 0.00125997 0.000414901 0.0001782973 0.0003908265 0.001168558 0.001646503 0.0003458442 0.004570613 0.0006110514 0.0001276686 0.005427206 0.001592606 0.01211363 0.00210907 0.001400933 0.001916616 0.002055086 0.004203653 0.004298676 8.855552e-05 0.0001195579 0.004484883 0.002240382 0.00195277 0.0003333748 0.00208808 0.001645889 0.0007296045 0.1836048 0.0006347466 0.001531747 0.000555686 0.005596967 0.0002655563 0.0009525702 0.000539777 0.001421857
## Training on 1 GPU(s)
result_mg <- dp_train(
make_model = make_model,
data = dp_data,
loss_fn = function(out, tgt) ag_softmax_cross_entropy_loss(out, tgt),
forward_fn = function(model, s) model$forward(ag_tensor(s$x)),
target_fn = function(s) s$y,
n_gpu = n_gpu,
n_iter = 2000L,
lr = 1e-3,
max_norm = 5.0, # gradient clipping
verbose = FALSE
)With n_gpu = 2 the effective batch is 2 and training is
~2x faster (ignoring communication overhead).
Pass max_norm to clip the global gradient norm before
each optimizer step:
result <- dp_train(
make_model = make_model,
data = dp_data,
loss_fn = function(out, tgt) ag_softmax_cross_entropy_loss(out, tgt),
forward_fn = function(model, s) model$forward(ag_tensor(s$x)),
target_fn = function(s) s$y,
n_gpu = 1L,
n_iter = 2000L,
lr = 1e-3,
max_norm = 1.0 # clip to unit norm
)## [dp_train] iter 1 / 2000 loss = 1.084859
## [dp_train] iter 10 / 2000 loss = 1.078990
## [dp_train] iter 20 / 2000 loss = 0.573856
## [dp_train] iter 30 / 2000 loss = 0.716332
## [dp_train] iter 40 / 2000 loss = 0.495728
## [dp_train] iter 50 / 2000 loss = 0.413461
## [dp_train] iter 60 / 2000 loss = 1.587604
## [dp_train] iter 70 / 2000 loss = 1.329936
## [dp_train] iter 80 / 2000 loss = 1.128712
## [dp_train] iter 90 / 2000 loss = 1.043059
## [dp_train] iter 100 / 2000 loss = 0.967249
## [dp_train] iter 110 / 2000 loss = 2.182918
## [dp_train] iter 120 / 2000 loss = 1.063052
## [dp_train] iter 130 / 2000 loss = 1.216791
## [dp_train] iter 140 / 2000 loss = 1.045920
## [dp_train] iter 150 / 2000 loss = 0.909009
## [dp_train] iter 160 / 2000 loss = 0.375079
## [dp_train] iter 170 / 2000 loss = 0.127171
## [dp_train] iter 180 / 2000 loss = 0.151755
## [dp_train] iter 190 / 2000 loss = 0.087562
## [dp_train] iter 200 / 2000 loss = 0.067301
## [dp_train] iter 210 / 2000 loss = 1.232388
## [dp_train] iter 220 / 2000 loss = 0.863086
## [dp_train] iter 230 / 2000 loss = 0.774115
## [dp_train] iter 240 / 2000 loss = 0.660062
## [dp_train] iter 250 / 2000 loss = 0.725449
## [dp_train] iter 260 / 2000 loss = 1.090140
## [dp_train] iter 270 / 2000 loss = 1.323715
## [dp_train] iter 280 / 2000 loss = 0.692716
## [dp_train] iter 290 / 2000 loss = 0.501307
## [dp_train] iter 300 / 2000 loss = 0.583796
## [dp_train] iter 310 / 2000 loss = 0.084518
## [dp_train] iter 320 / 2000 loss = 0.023469
## [dp_train] iter 330 / 2000 loss = 0.041449
## [dp_train] iter 340 / 2000 loss = 0.026766
## [dp_train] iter 350 / 2000 loss = 0.023658
## [dp_train] iter 360 / 2000 loss = 0.911897
## [dp_train] iter 370 / 2000 loss = 0.512977
## [dp_train] iter 380 / 2000 loss = 0.462576
## [dp_train] iter 390 / 2000 loss = 0.378539
## [dp_train] iter 400 / 2000 loss = 0.510559
## [dp_train] iter 410 / 2000 loss = 0.363325
## [dp_train] iter 420 / 2000 loss = 1.814408
## [dp_train] iter 430 / 2000 loss = 0.345297
## [dp_train] iter 440 / 2000 loss = 0.187492
## [dp_train] iter 450 / 2000 loss = 0.347652
## [dp_train] iter 460 / 2000 loss = 0.043737
## [dp_train] iter 470 / 2000 loss = 0.008988
## [dp_train] iter 480 / 2000 loss = 0.022030
## [dp_train] iter 490 / 2000 loss = 0.013282
## [dp_train] iter 500 / 2000 loss = 0.012483
## [dp_train] iter 510 / 2000 loss = 0.614618
## [dp_train] iter 520 / 2000 loss = 0.260837
## [dp_train] iter 530 / 2000 loss = 0.244040
## [dp_train] iter 540 / 2000 loss = 0.197708
## [dp_train] iter 550 / 2000 loss = 0.348170
## [dp_train] iter 560 / 2000 loss = 0.107035
## [dp_train] iter 570 / 2000 loss = 2.379108
## [dp_train] iter 580 / 2000 loss = 0.193235
## [dp_train] iter 590 / 2000 loss = 0.082315
## [dp_train] iter 600 / 2000 loss = 0.241091
## [dp_train] iter 610 / 2000 loss = 0.029768
## [dp_train] iter 620 / 2000 loss = 0.004216
## [dp_train] iter 630 / 2000 loss = 0.014175
## [dp_train] iter 640 / 2000 loss = 0.007590
## [dp_train] iter 650 / 2000 loss = 0.007710
## [dp_train] iter 660 / 2000 loss = 0.402700
## [dp_train] iter 670 / 2000 loss = 0.132259
## [dp_train] iter 680 / 2000 loss = 0.132457
## [dp_train] iter 690 / 2000 loss = 0.106314
## [dp_train] iter 700 / 2000 loss = 0.238757
## [dp_train] iter 710 / 2000 loss = 0.048612
## [dp_train] iter 720 / 2000 loss = 2.913434
## [dp_train] iter 730 / 2000 loss = 0.149448
## [dp_train] iter 740 / 2000 loss = 0.052282
## [dp_train] iter 750 / 2000 loss = 0.198717
## [dp_train] iter 760 / 2000 loss = 0.021020
## [dp_train] iter 770 / 2000 loss = 0.002177
## [dp_train] iter 780 / 2000 loss = 0.009669
## [dp_train] iter 790 / 2000 loss = 0.004679
## [dp_train] iter 800 / 2000 loss = 0.005054
## [dp_train] iter 810 / 2000 loss = 0.273796
## [dp_train] iter 820 / 2000 loss = 0.073170
## [dp_train] iter 830 / 2000 loss = 0.078655
## [dp_train] iter 840 / 2000 loss = 0.062381
## [dp_train] iter 850 / 2000 loss = 0.168417
## [dp_train] iter 860 / 2000 loss = 0.031244
## [dp_train] iter 870 / 2000 loss = 3.311355
## [dp_train] iter 880 / 2000 loss = 0.136812
## [dp_train] iter 890 / 2000 loss = 0.038249
## [dp_train] iter 900 / 2000 loss = 0.172097
## [dp_train] iter 910 / 2000 loss = 0.015105
## [dp_train] iter 920 / 2000 loss = 0.001222
## [dp_train] iter 930 / 2000 loss = 0.006810
## [dp_train] iter 940 / 2000 loss = 0.003072
## [dp_train] iter 950 / 2000 loss = 0.003472
## [dp_train] iter 960 / 2000 loss = 0.197246
## [dp_train] iter 970 / 2000 loss = 0.045042
## [dp_train] iter 980 / 2000 loss = 0.051198
## [dp_train] iter 990 / 2000 loss = 0.040722
## [dp_train] iter 1000 / 2000 loss = 0.123422
## [dp_train] iter 1010 / 2000 loss = 0.023878
## [dp_train] iter 1020 / 2000 loss = 3.580703
## [dp_train] iter 1030 / 2000 loss = 0.136361
## [dp_train] iter 1040 / 2000 loss = 0.030881
## [dp_train] iter 1050 / 2000 loss = 0.154193
## [dp_train] iter 1060 / 2000 loss = 0.011325
## [dp_train] iter 1070 / 2000 loss = 0.000743
## [dp_train] iter 1080 / 2000 loss = 0.005011
## [dp_train] iter 1090 / 2000 loss = 0.002187
## [dp_train] iter 1100 / 2000 loss = 0.002509
## [dp_train] iter 1110 / 2000 loss = 0.151642
## [dp_train] iter 1120 / 2000 loss = 0.030212
## [dp_train] iter 1130 / 2000 loss = 0.035706
## [dp_train] iter 1140 / 2000 loss = 0.028964
## [dp_train] iter 1150 / 2000 loss = 0.093290
## [dp_train] iter 1160 / 2000 loss = 0.019898
## [dp_train] iter 1170 / 2000 loss = 3.738376
## [dp_train] iter 1180 / 2000 loss = 0.138909
## [dp_train] iter 1190 / 2000 loss = 0.025482
## [dp_train] iter 1200 / 2000 loss = 0.138944
## [dp_train] iter 1210 / 2000 loss = 0.008645
## [dp_train] iter 1220 / 2000 loss = 0.000481
## [dp_train] iter 1230 / 2000 loss = 0.003774
## [dp_train] iter 1240 / 2000 loss = 0.001638
## [dp_train] iter 1250 / 2000 loss = 0.001889
## [dp_train] iter 1260 / 2000 loss = 0.123039
## [dp_train] iter 1270 / 2000 loss = 0.021788
## [dp_train] iter 1280 / 2000 loss = 0.026266
## [dp_train] iter 1290 / 2000 loss = 0.022265
## [dp_train] iter 1300 / 2000 loss = 0.071663
## [dp_train] iter 1310 / 2000 loss = 0.017445
## [dp_train] iter 1320 / 2000 loss = 3.763989
## [dp_train] iter 1330 / 2000 loss = 0.144014
## [dp_train] iter 1340 / 2000 loss = 0.021572
## [dp_train] iter 1350 / 2000 loss = 0.124212
## [dp_train] iter 1360 / 2000 loss = 0.006762
## [dp_train] iter 1370 / 2000 loss = 0.000331
## [dp_train] iter 1380 / 2000 loss = 0.002929
## [dp_train] iter 1390 / 2000 loss = 0.001295
## [dp_train] iter 1400 / 2000 loss = 0.001468
## [dp_train] iter 1410 / 2000 loss = 0.104455
## [dp_train] iter 1420 / 2000 loss = 0.016678
## [dp_train] iter 1430 / 2000 loss = 0.019991
## [dp_train] iter 1440 / 2000 loss = 0.018251
## [dp_train] iter 1450 / 2000 loss = 0.056216
## [dp_train] iter 1460 / 2000 loss = 0.015847
## [dp_train] iter 1470 / 2000 loss = 3.709096
## [dp_train] iter 1480 / 2000 loss = 0.151369
## [dp_train] iter 1490 / 2000 loss = 0.018353
## [dp_train] iter 1500 / 2000 loss = 0.110618
## [dp_train] iter 1510 / 2000 loss = 0.005392
## [dp_train] iter 1520 / 2000 loss = 0.000240
## [dp_train] iter 1530 / 2000 loss = 0.002312
## [dp_train] iter 1540 / 2000 loss = 0.001058
## [dp_train] iter 1550 / 2000 loss = 0.001177
## [dp_train] iter 1560 / 2000 loss = 0.092487
## [dp_train] iter 1570 / 2000 loss = 0.013313
## [dp_train] iter 1580 / 2000 loss = 0.015647
## [dp_train] iter 1590 / 2000 loss = 0.015667
## [dp_train] iter 1600 / 2000 loss = 0.044582
## [dp_train] iter 1610 / 2000 loss = 0.015124
## [dp_train] iter 1620 / 2000 loss = 3.589678
## [dp_train] iter 1630 / 2000 loss = 0.159220
## [dp_train] iter 1640 / 2000 loss = 0.015467
## [dp_train] iter 1650 / 2000 loss = 0.101143
## [dp_train] iter 1660 / 2000 loss = 0.004342
## [dp_train] iter 1670 / 2000 loss = 0.000184
## [dp_train] iter 1680 / 2000 loss = 0.001856
## [dp_train] iter 1690 / 2000 loss = 0.000895
## [dp_train] iter 1700 / 2000 loss = 0.000957
## [dp_train] iter 1710 / 2000 loss = 0.084262
## [dp_train] iter 1720 / 2000 loss = 0.010972
## [dp_train] iter 1730 / 2000 loss = 0.012489
## [dp_train] iter 1740 / 2000 loss = 0.013863
## [dp_train] iter 1750 / 2000 loss = 0.035681
## [dp_train] iter 1760 / 2000 loss = 0.015042
## [dp_train] iter 1770 / 2000 loss = 3.414433
## [dp_train] iter 1780 / 2000 loss = 0.165160
## [dp_train] iter 1790 / 2000 loss = 0.013085
## [dp_train] iter 1800 / 2000 loss = 0.094132
## [dp_train] iter 1810 / 2000 loss = 0.003545
## [dp_train] iter 1820 / 2000 loss = 0.000149
## [dp_train] iter 1830 / 2000 loss = 0.001515
## [dp_train] iter 1840 / 2000 loss = 0.000778
## [dp_train] iter 1850 / 2000 loss = 0.000803
## [dp_train] iter 1860 / 2000 loss = 0.078147
## [dp_train] iter 1870 / 2000 loss = 0.009184
## [dp_train] iter 1880 / 2000 loss = 0.010088
## [dp_train] iter 1890 / 2000 loss = 0.012412
## [dp_train] iter 1900 / 2000 loss = 0.028636
## [dp_train] iter 1910 / 2000 loss = 0.015328
## [dp_train] iter 1920 / 2000 loss = 3.221948
## [dp_train] iter 1930 / 2000 loss = 0.175700
## [dp_train] iter 1940 / 2000 loss = 0.010949
## [dp_train] iter 1950 / 2000 loss = 0.086725
## [dp_train] iter 1960 / 2000 loss = 0.002894
## [dp_train] iter 1970 / 2000 loss = 0.000126
## [dp_train] iter 1980 / 2000 loss = 0.001233
## [dp_train] iter 1990 / 2000 loss = 0.000685
## [dp_train] iter 2000 / 2000 loss = 0.000671
ag_dataloader — batched training loopFor standard single-process batched training
ag_dataloader is simpler than dp_train:
x_tr <- x_cm[, 1:120]; y_tr <- y_oh[, 1:120]
dl <- ag_dataloader(x_tr, y_tr, batch_size = 32L, shuffle = TRUE)
model2 <- make_model()
params2 <- model2$parameters()
opt2 <- optimizer_adam(params2, lr = 1e-3)
ag_train(model2)
for (ep in seq_len(100L)) {
for (batch in dl$epoch()) {
with_grad_tape({
loss <- ag_softmax_cross_entropy_loss(
model2$forward(batch$x), batch$y$data)
})
grads <- backward(loss)
opt2$step(grads); opt2$zero_grad()
}
}