Module 2: Exercise Results
PCA
= prcomp(crabs_meas) c_pcs
Plot PC projections (embeddings).
pairs(c_pcs$x, col = c("orchid","forestgreen")[factor(crabs$sp)])
pairs(c_pcs$x, col = c("orchid","forestgreen")[factor(crabs$sex)])
tSNE:
library(tsne)
= tsne(crabs_meas,perplexity = 10) c_tsne10
## sigma summary: Min. : 0.295392306171995 |1st Qu. : 0.424864940106807 |Median : 0.475900590252246 |Mean : 0.477263744443299 |3rd Qu. : 0.522810659014478 |Max. : 0.672971536327323 |
## Epoch: Iteration #100 error is: 14.9742952907292
## Epoch: Iteration #200 error is: 0.449199180889804
## Epoch: Iteration #300 error is: 0.422110962193814
## Epoch: Iteration #400 error is: 0.41223625966381
## Epoch: Iteration #500 error is: 0.40748658836236
## Epoch: Iteration #600 error is: 0.404664189721469
## Epoch: Iteration #700 error is: 0.40307600123476
## Epoch: Iteration #800 error is: 0.40196199567539
## Epoch: Iteration #900 error is: 0.400979788137516
## Epoch: Iteration #1000 error is: 0.400216130969452
= tsne(crabs_meas,perplexity = 20) c_tsne20
## sigma summary: Min. : 0.42069998064187 |1st Qu. : 0.505494820242659 |Median : 0.550282641638609 |Mean : 0.553782538032253 |3rd Qu. : 0.597446288884567 |Max. : 0.737568418500652 |
## Epoch: Iteration #100 error is: 14.2353799777606
## Epoch: Iteration #200 error is: 0.381533068263884
## Epoch: Iteration #300 error is: 0.366558073482219
## Epoch: Iteration #400 error is: 0.355973545801828
## Epoch: Iteration #500 error is: 0.355058032759469
## Epoch: Iteration #600 error is: 0.35459440760735
## Epoch: Iteration #700 error is: 0.35429209109582
## Epoch: Iteration #800 error is: 0.354090074160959
## Epoch: Iteration #900 error is: 0.353933226719324
## Epoch: Iteration #1000 error is: 0.353803462746889
= tsne(crabs_meas,perplexity = 50) c_tsne50
## sigma summary: Min. : 0.539839363698465 |1st Qu. : 0.634067694694373 |Median : 0.675230651916411 |Mean : 0.676426601512199 |3rd Qu. : 0.712708887622463 |Max. : 0.85041386579969 |
## Epoch: Iteration #100 error is: 13.3621854617619
## Epoch: Iteration #200 error is: 0.395961248729063
## Epoch: Iteration #300 error is: 0.331323038866023
## Epoch: Iteration #400 error is: 0.330681320407713
## Epoch: Iteration #500 error is: 0.330681320259094
## Epoch: Iteration #600 error is: 0.330681320259094
## Epoch: Iteration #700 error is: 0.330681320259094
## Epoch: Iteration #800 error is: 0.330681320259094
## Epoch: Iteration #900 error is: 0.330681320259094
## Epoch: Iteration #1000 error is: 0.330681320259094
= tsne(crabs_meas,perplexity = 100) c_tsne100
## sigma summary: Min. : 0.689338665294285 |1st Qu. : 0.801156853023062 |Median : 0.838030059692607 |Mean : 0.83585263946599 |3rd Qu. : 0.869043547272454 |Max. : 1.00462478171883 |
## Epoch: Iteration #100 error is: 11.1490750106089
## Epoch: Iteration #200 error is: 0.208862858502072
## Epoch: Iteration #300 error is: 0.20832428398813
## Epoch: Iteration #400 error is: 0.208324207704578
## Epoch: Iteration #500 error is: 0.208324207704482
## Epoch: Iteration #600 error is: 0.208324207704482
## Epoch: Iteration #700 error is: 0.208324207704482
## Epoch: Iteration #800 error is: 0.208324207704482
## Epoch: Iteration #900 error is: 0.208324207704482
## Epoch: Iteration #1000 error is: 0.208324207704482
sex_cols = c(“orchid”,“forestgreen”)[factor(crabs$sex)]
Color-code tSNE plot by species, try various perplexity levels:
= c("orchid","forestgreen")[factor(crabs$sp)]
species_cols par(mfrow=c(2,2))
plot(c_tsne10[,1],
2],
c_tsne10[,main = "Perplexity = 10",
col = species_cols)
plot(c_tsne20[,1],
2],
c_tsne20[,main = "Perplexity = 20",
col = species_cols)
plot(c_tsne50[,1],
2],
c_tsne50[,main = "Perplexity = 50",
col = species_cols)
plot(c_tsne100[,1],
2],
c_tsne100[,main = "Perplexity = 100",
col = species_cols)
Now do the same, but colour-code for sex:
= c("orchid","forestgreen")[factor(crabs$sex)]
sex_cols par(mfrow=c(2,2))
plot(c_tsne10[,1],
2],
c_tsne10[,main = "Perplexity = 10",
col = sex_cols)
plot(c_tsne20[,1],
2],
c_tsne20[,main = "Perplexity = 20",
col = sex_cols)
plot(c_tsne50[,1],
2],
c_tsne50[,main = "Perplexity = 50",
col = sex_cols)
plot(c_tsne100[,1],
2],
c_tsne100[,main = "Perplexity = 100",
col = sex_cols)
Run UMAP
library(umap)
<- umap(crabs_meas)
c_umap str(c_umap)
## List of 4
## $ layout: num [1:200, 1:2] 1.86 1.7 1.74 2.06 2.12 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:200] "1" "2" "3" "4" ...
## .. ..$ : NULL
## $ data : num [1:200, 1:5] 8.1 8.8 9.2 9.6 9.8 10.8 11.1 11.6 11.8 11.8 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:200] "1" "2" "3" "4" ...
## .. ..$ : chr [1:5] "FL" "RW" "CL" "CW" ...
## $ knn :List of 2
## ..$ indexes : int [1:200, 1:15] 1 2 3 4 5 6 7 8 9 10 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:200] "1" "2" "3" "4" ...
## .. .. ..$ : NULL
## ..$ distances: num [1:200, 1:15] 0 0 0 0 0 0 0 0 0 0 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:200] "1" "2" "3" "4" ...
## .. .. ..$ : NULL
## ..- attr(*, "class")= chr "umap.knn"
## $ config:List of 24
## ..$ n_neighbors : int 15
## ..$ n_components : int 2
## ..$ metric : chr "euclidean"
## ..$ n_epochs : int 200
## ..$ input : chr "data"
## ..$ init : chr "spectral"
## ..$ min_dist : num 0.1
## ..$ set_op_mix_ratio : num 1
## ..$ local_connectivity : num 1
## ..$ bandwidth : num 1
## ..$ alpha : num 1
## ..$ gamma : num 1
## ..$ negative_sample_rate: int 5
## ..$ a : num 1.58
## ..$ b : num 0.895
## ..$ spread : num 1
## ..$ random_state : int 725881182
## ..$ transform_state : int NA
## ..$ knn : logi NA
## ..$ knn_repeats : num 1
## ..$ verbose : logi FALSE
## ..$ umap_learn_args : logi NA
## ..$ method : chr "naive"
## ..$ metric.function :function (m, origin, targets)
## ..- attr(*, "class")= chr "umap.config"
## - attr(*, "class")= chr "umap"
par(mfrow=c(1,2))
plot(c_umap$layout[,1],
$layout[,2],
c_umapcol = species_cols, pch = 19,
main = "Colored by species")
plot(c_umap$layout[,1],
$layout[,2],
c_umapcol = sex_cols, pch = 19,
main = "Colored by sex")