昆虫数量_花朵特征_数据可视化

Clojure

下载此实例

开发语言：Others
实例大小：5.42KB
下载次数：0
浏览次数：17
发布时间：2025-01-04
实例类别：Clojure
发布人：yunfeipanpanhaolizi
文件格式：.R
所需积分：1

相关标签：数据可视化可视化花朵特征数据

网友评论举报投诉收藏该页

下载此实例

实例介绍

[下载地址]

【实例简介】
探讨了昆虫数量与花朵特征之间的关系，并通过数据可视化的方式展示了研究结果
【实例截图】
from clipboard

【核心代码】
source("script/00-readData_正式.R", encoding = "utf8")

head(dd_insect)
head(dd_flwr)

alpha = 0.1 #alpha改透明度

#-------先看昆虫,可视化---------------------

head(dd_insect,2)
showtext::showtext.auto()

colnames(dd_insect) #names() == colnames()
x11()
ggplot()
geom_point(data = dd_insect,
aes(x = area, y = `14`, color = 次数),
position = position_jitter(height = 0, width = 0.3),
size = 0.4
)

ggplot()
geom_boxplot(data = dd_insect,
aes(x = area, y = 普通型, fill = 次数)
)

#-------pivot_long():转成长型数据::::pivot_wider:宽型数据-------------
colnames(dd_insect) |> dput()
SpeciesName <- c("黑白型", "普通型",
"大黄", "小菱", "黄粉", "无毛喑哑", "小亮尖", "姬蜂B和C",
"姬蜂A", "短柄泥蜂", "22", "62", "9", "J", "21（联地蜂）",
"70", "D(德国黄胡蜂）", "M（大写）", "25", "90", "14",
"888", "777", "666", "C（大写）", "8", "4", "555", "444",
"999", "333", "B（大写）", "19", "47", "3", "28", "48", "23",
"r(小写）", "F(大写）", "f(小写）", "2", "bb", "95")

dd_insect_long <-
dd_insect |>
pivot_longer(cols = SpeciesName, # ********
names_to = "Species",
values_to = "num")

# 从长数据变成宽数据
dd_insect_long |>
pivot_wider(names_from = "Species", values_from = "num")

ggplot()
geom_point(data = dd_insect_long,
aes(x = area, y = num, color = 次数),
position = position_jitter(height = 0, width = 0.3),
size = 0.1
)
facet_wrap( . ~ Species, scales = "free_y")

dominatorSpecies <- c( "普通型", "大黄", "小亮尖","M（大写）","14","28","555","95","bb","D(德国黄胡蜂）",
"短柄泥蜂","黑白型","姬蜂A","姬蜂B和C","无毛喑哑")

dd_dominatorWB <- dd_insect_long |> filter(Species %in% dominatorSpecies)

ggplot()
geom_point(data = dd_dominatorWB,
aes(x = area, y = log(num 1), color = 次数),
position = position_jitter(height = 0, width = 0.3),
size = 0.1
)
geom_boxplot(data = dd_dominatorWB,
aes(x = area, y = log(num 1), color = 次数) )
facet_wrap( . ~ Species)

### --------- 学习：横向求总和mutate()和transmute()-----
head(dd_insect,2)
colnames(dd_insect) |> dput()

dd_insect |>
mutate(sum = `黑白型` `普通型`
`大黄` `小菱` `黄粉` `无毛喑哑` `小亮尖` `姬蜂B和C`
`姬蜂A` `短柄泥蜂` `22` `62` `9` `J` `21（联地蜂）`
`70` `D(德国黄胡蜂）` `M（大写）` `25` `90` `14`
`888` `777` `666` `C（大写）` `8` `4` `555` `444`
`999` `333` `B（大写）` `19` `47` `3` `28` `48` `23`
`r(小写）` `F(大写）` `f(小写）` `2` `bb` `95` )

# mutate()
dd_insect_Xsum <-
dd_insect |>
mutate(sum = `黑白型` `普通型` # transmute也可以
`大黄` `小菱` `黄粉` `无毛喑哑` `小亮尖` `姬蜂B和C`
`姬蜂A` `短柄泥蜂` `22` `62` `9` `J` `21（联地蜂）`
`70` `D(德国黄胡蜂）` `M（大写）` `25` `90` `14`
`888` `777` `666` `C（大写）` `8` `4` `555` `444`
`999` `333` `B（大写）` `19` `47` `3` `28` `48` `23`
`r(小写）` `F(大写）` `f(小写）` `2` `bb` `95` )

# transmute也可以
dd_insect |>
transmute(area,`站点`, `次数`, `日期`, `颜色`,
sum = `黑白型` `普通型`
`大黄` `小菱` `黄粉` `无毛喑哑` `小亮尖` `姬蜂B和C`
`姬蜂A` `短柄泥蜂` `22` `62` `9` `J` `21（联地蜂）`
`70` `D(德国黄胡蜂）` `M（大写）` `25` `90` `14`
`888` `777` `666` `C（大写）` `8` `4` `555` `444`
`999` `333` `B（大写）` `19` `47` `3` `28` `48` `23`
`r(小写）` `F(大写）` `f(小写）` `2` `bb` `95` )

head(dd_insect_Xsum)

showtext::showtext.end()
ggplot()
geom_boxplot(data = dd_insect_Xsum,
aes(x = area, y = sum, color = 次数),
outlier.color = "NA",
position = position_dodge(width = 0.9)
)
geom_point(data = dd_insect_Xsum,
aes(x = area, y = sum, color = 次数),
position = position_jitterdodge(dodge.width = 0.9,
jitter.height = 0,
jitter.width = 0.1
),
shape = 1,
# position = position_dodge(width = 0.9),
# position = position_jitter(height = 0, width = 0.3),

size = 0.6
)
labs(y = "wildbee sum", x = "Area", color = "Times")

ggsave("img/wildbeeSum.jpg", units = "cm", width = 15, height = 15 * 0.618)

ggplot()
geom_boxplot(data = dd_insect_Xsum,
aes(x = area, y = sum),
outlier.color = "NA",
position = position_dodge(width = 0.9)
)
geom_point(data = dd_insect_Xsum,
aes(x = area, y = sum),
shape = 1,
position = position_jitter(height = 0, width = 0.2),
# position = position_dodge(width = 0.9),
# position = position_jitter(height = 0, width = 0.3),

size = 0.6
)
labs(y = "wildbee sum", x = "Area", color = "Times")

ggsave("img/wildbeeSumNoGroup.jpg", units = "cm", width = 15, height = 15 * 0.618)

#--------分组求和-----------
##------- 学习：group_by() |> summarise()----------
dd_insect_groupSum <-
dd_insect_Xsum |>
group_by(area,次数,站点) |>
summarise(sum.total = sum(sum)) |> #mean(),sum(),sd(),se = sd()/sqrt(n)
ungroup()

ggplot()
geom_boxplot(data = dd_insect_groupSum,
aes(x = area, y = sum.total),
outlier.color = "NA",
position = position_dodge(width = 0.9)
)
geom_point(data = dd_insect_groupSum,
aes(x = area, y = sum.total),
shape = 1,
position = position_jitter(height = 0, width = 0.2),
# position = position_dodge(width = 0.9),
# position = position_jitter(height = 0, width = 0.3),

size = 0.6
)
labs(y = "wildbee sum", x = "Area", color = "Times")

ggplot()
geom_boxplot(data = dd_insect_groupSum,
aes(x = area, y = sum.total,color=次数),
outlier.color = "NA",
position = position_dodge(width = 0.9)
)
geom_point(data = dd_insect_groupSum,
aes(x = area, y = sum.total,color = 次数),
shape = 1,
position = position_jitterdodge(dodge.width = 0.9,
jitter.height = 0,
jitter.width = 0.1),
# position = position_jitter(height = 0, width = 0.2),
# position = position_dodge(width = 0.9),
# position = position_jitter(height = 0, width = 0.3),
size = 1
)
labs(y = "wildbee sum", x = "Area", color = "Times")

ggsave("img/wildbeeSum_widh_Group.pdf", units = "cm", width = 15, height = 15 * 0.618, device = cairo_pdf())

###------------建立模型-------------------
#模型的目的: 就是为了得到p值，可以做推论。
#模型分为2大种：
#线性模型(参数检验):能给p值，能给出一个变量的平均值和截距ax b，
# 条件：残差正态分布，残差齐次性
# 适用于：自变量可以分组，也可以是连续性数据，y = group1 group2 x1 x2 x1:group1
#非参数检验：能给p值，不用满足任何条件,只适合用于分组变量
## 适用于：变量可以分组(dd$area)，不可以是连续性数据（花梗), y = group1

####----------- 1.0 非参数检验(秩和检验：rank sum test)-------------
dd_insect_groupSum

dd_temp1 <-
dd_insect_groupSum |> filter(次数 == 1)

dd_temp2 <-
dd_insect_groupSum |> filter(次数 == 2)

dd_temp3 <-
dd_insect_groupSum |> filter(次数 == 3)

kruskal.test(sum.total ~ area, data = dd_temp3)
kruskal.test(sum.total ~ area, data = dd_temp2)
kruskal.test(sum.total ~ area, data = dd_temp1)

kruskal.test(sum.total ~ area, data = dd_temp1 |> filter(!ID %in% c(6,7)))

####----------- 1.1 线性参数检验(Parameter Estimate)-------------
# GLMM, GLM，anova（方差检验）全是基于lm
# 1.条件：因为y:或残差需要的数据量很大,符合正态，方差齐次性，自变量x数据任意：即离散或连续,
# lm:
# anova(): 当lm的自变量是分组，等同于anova(),
# 当分组只有2个时,lm=anova=t检验（只适用于2组）
# 当分组>2个时，lm=anova != t检验
# anova: aov(y = group1)
# lm : lm(y = x3 beta1_x2 group1 x3)
# 2. 条件：因为y:或残差只需满足方差齐次性，自变量x数据任意：即离散或连续，但需要指定分布函数
# GLM：分布函数：gaussian 就是正态
# poisson, binomail(0,1), negative.binomail: y必须离散的正整数
# gamma:y>0 质量，
# beta_binomial:y %in% c(0,1)
#3. 条件：同2，只是增加了随机效应 (1|site):随机截距必须是分类的，即相信每个site的虫量因各种原因而不一样。
# GLMM: (1|site)
dd_temp1 <-
dd_insect_groupSum |> filter(次数 == 1)

x11()
dd_temp1$ID <- 1:nrow(dd_temp1)
ggplot()
geom_boxplot(data = dd_temp1,
aes(x = area, y = sum.total,color=次数),
# outlier.color = "NA",
position = position_dodge(width = 0.9)
)
geom_text(data= dd_temp1,
aes( x= area, y = sum.total,
label = ID),
nudge_x = 0.2
)

dd_temp1$ID <- 1:nrow(dd_temp1)
dd_temp1_del <- dd_temp1 |> filter(!ID %in% c(6,7))
ggplot()
geom_boxplot(data = dd_temp1_del,
aes(x = area, y = sum.total,color=次数),
# outlier.color = "NA",
position = position_dodge(width = 0.9)
)
geom_text(data= dd_temp1_del,
aes( x= area, y = sum.total,
label = ID),
nudge_x = 0.2
)

ggsave()

head(dd_temp1)
tail(dd_temp1)

### lm:

car::densityPlot(dd_temp1_del$sum.total)
# lm
mod <- mod1 <- lm( sum.total ~ area ,data = dd_temp1_del)

# glm: poisson nbiomial1, nbnomial2
mod <- mod2 <- glmmTMB( sum.total ~ area ,data = dd_temp1_del,
family = poisson())
mod <- mod3 <- glmmTMB( sum.total ~ area,data = dd_temp1_del,
family = nbinom1())
mod <- mod4 <- glmmTMB( sum.total ~ area ,data = dd_temp1_del,
family = nbinom2())

# glmm: LMM,poisson nbiomial1, nbnomial2
mod <- mod5 <- glmmTMB( sum.total ~ area (1|站点) ,data = dd_temp1_del
)
mod <- mod6 <- glmmTMB( sum.total ~ area (1|站点) ,data = dd_temp1_del,
family = poisson())
mod <- mod7 <- glmmTMB( sum.total ~ area (1|站点) ,data = dd_temp1_del,
family = nbinom1())
mod <- mod8 <- glmmTMB( sum.total ~ area (1|站点) ,data = dd_temp1_del,
family = nbinom2())

t.test(sum.total~area, data = dd_temp1_del)
mod <- mod3

summary(mod)
simulateResiduals(mod) |> plot()

AIC(mod1,mod2,mod3,mod4,mod5,mod6,mod7,mod8)

### 提供了检验模型好不好的表。
x11()
check_model(model2)
simulateResiduals(model3) |> plot()
par(mfrow = c(2,2))
plot(model2)

library(glmmTMB)

标签： 数据可视化可视化花朵特征数据

实例下载地址