# 字元型向量
name <- c("張三","李四")
# 數值型向量
num <- c(1,2,3.4,5)
# 邏輯型向量
b <- c(F,T,FALSE,TRUE)
# 不能有混合類型
a <- c(1,2,T,F) # 1 2 1 0
# 不存在子向量
d <- c(1,c(2,3),c(4,5)) # 1 2 3 4 5
# 建立固定長度向量
x1 <- vector("numeric",3) # 0 0 0
x2 <- numeric(3) # 0 0 0
x3 <- character(3) # "" "" ""
x4 <- logical(3) # FALSE FALSE FALSE
x5 <- vector(length = 3) # FALSE FALSE FALSE
使用seq建立向量
seq(from = 1,to = 10,by=2) # 1 3 5 7 9
seq(from = 10,to = 1,by=-2) # 10 8 6 4 2
seq(from = 1,to = 10,len=3) # 1.0 5.5 10.0
# 特别的,步長為1
1:5 # 1 2 3 4 5
pi:1 # 3.141593 2.141593 1.141593
1:5-1 # 0 1 2 3 4
1:(5-1) # 1 2 3 4
使用sample建立向量
sample(5) # 2 4 5 1 3
sample(c('a','b','c','d')) # "d" "c" "b" "a"
set.seed(2020) # 設定随機數種子
sample(5) # 5 2 4 3 1
sample(1:5,3) # 1 5 2 随機選三個
# 有放回的抽樣
re_sample = sample(1:100,100,replace = TRUE)
unique_re_sample = unique(re_sample)
length(unique_re_sample)
通路向量子集
正整數下标
score <- c(95,96,85,98,88,90)
score[c(3,5)] # 85 88
score[-c(3,5)] # 95 96 98 90 反向取出
score[c(3,5)] - 90 # -5 -2
score[c(3,5)] <- score[c(3,5)] +6
score # 95 96 91 98 94 90
注意下标的特殊用法
score[] <- mean(score) # 每一個元素獲得平均分
score # 94 94 94 94 94 94
score <- mean(score) # 一個數值平均分
score # 94
# 下标可以重複,順序可以改變
name <- c("張三","李四","王五")
name[c(1,1,3,2)] # "張三" "張三" "王五" "李四"
負整數下标
score <- c(95,96,85,98,88,90)
score[-c(3,5)] # 95 96 98 90
idx <- which(score<90) # 3 5 小于90的下标
score[-idx] # 95 96 98 90
邏輯下标
score <- c(95,96,85,98,88,90)
name <- c("張三","李四","王五","劉備","曹操","張飛")
score < 90 # FALSE FALSE TRUE FALSE TRUE FALSE
score[score<90] # 85 88
name[score<90] # "王五" "曹操" 小于90的姓名
通過元素的名稱通路子集
score <- c(95,96,85,98,88,90)
xm <- c("張三","李四","王五","劉備","曹操","張飛")
names(score)<- xm
score
# 張三 李四 王五 劉備 曹操 張飛
# 95 96 85 98 88 90
score[c("劉備","張飛")]
# 劉備 張飛
# 98 90
向量的基本操作
向量排序
v1 <- c(a=5,b=10,c=12,d=6)
sort(v1)
# a d b c
# 5 6 10 12
order(v1,decreasing = TRUE) # 3 2 4 1 下标排序
v1[order(v1,decreasing = TRUE)]
# c b d a
# 12 10 6 5
score <- c(95,96,85,98,88,90)
rev(score) # 90 88 98 85 96 95
score[length(score)] # 90 取最後一個元素
tail(score,n=1) # 90 取最後一個元素
rev(tail(score,n=3)) # 90 88 98 倒數3個元素
向量的運算
p0 <- c(0,0)
p1 <- c(1,2)
p2 <- c(2,1)
# 求和
p3 <- p1+p2 # 3 3
# 數乘
p4 <- 1.5*p3 # 4.5 4.5
p1_on_p2 <- sum(p1*p2)/
sum(p1*p2)*p2 # 2 1 計算投影向量
因子的建立
gender <- c("male","male","female","female")
typeof(gender)
# [1] "character"
gender
# [1] "male" "male" "female" "female"
gender <- factor(gender)
typeof(gender)
# [1] "integer"
gender
# [1] male male female female
# Levels: female male
因子的操作
gender <- c("male","male","female","female")
gender <- factor(gender)
gender[c(1,2:3)]
# [1] male male female
# Levels: female male
nlevels(gender) # 2
levels(gender)# [1] "female" "male"
gender[1]<-"female" # 此時隻能指派"female" "male"
gender
# [1] female male female female
# Levels: female male
定義因子
gender <- c("male","male","female","female")
gender <- factor(gender,levels = c("male","female","shemale"))
gender[1] <- "shemale"
gender
# [1] shemale male female female
# Levels: male female shemale
因子的本質
gender <- c("male","male","female","female")
gender <- factor(gender)
as.numeric(gender)
# [1] 2 2 1 1
as.character(gender)
# [1] "male" "male" "female" "female"
number_factor <- c(10,20,40,20,30,10,20)
number_factor <- factor(number_factor)
as.numeric(number_factor) # [1] 1 2 4 2 3 1 2
# 因子正确求平均值
mean(as.numeric(as.character(number_factor))) # [1] 21.42857
mean(as.numeric(levels(number_factor)[number_factor])) # [1] 21.42857
建立有序因子
score <- factor(c('優','良','中','差','優','良','中'))
# error score[1]<score[2]
score <- factor(c('優','良','中','差','優','良','中'),ordered = TRUE)
score[1] <score[2] # TEUE 預設按字母排序 y l z c
score <- factor(c('優','良','中','差','優','良','中')
,ordered = TRUE
,levels = c('差','中','良','優'))
score[1] <score[2] # FALSE 中 > 差
根據已有的資料進行分箱
# 将百分制轉為5分制
score <- c(94,87,92,91,85,92)
score_factor_5 <- cut(score,
breaks = c(0,(6:10)*10),
include.lowest = TRUE,
right = FALSE,
ordered_result = TRUE,
labels = c('不及格','及格','中','良','優'))
score_factor_5
# [1] 優 良 優 優 良 優
# Levels: 不及格 < 及格 < 中 < 良 < 優
R語言www.bioinfo.online