关联规则-R语言实现

来源:互联网 时间:1970-01-01

关联规则

关联规则

shangfr

2015年10月29日


本文旨在演示r语言arules包的关联规则用法,以及利用arulesViz对结果进行可视化

关联规则是形如X→Y的蕴涵式,其中, X和Y分别称为关联规则的先导(antecedent或left-hand-side, LHS)和后继(consequent或right-hand-side, RHS) 。其中,关联规则XY,存在支持度和信任度。 For more details see关联规则.

r语言arules包提供了有效处理稀疏二元数据的数据结构,而且提供函数执Apriori和Eclat算法挖掘频繁项集、最大频繁项集、闭频繁项集和关联规则详见。

蘑菇数据data下载


r语言代码

library(arules)

## Loading required package: Matrix

##

## Attaching package: 'arules'

##

## The following objects are masked from 'package:base':

##

## %in%, abbreviate, write

data=read.csv(file.choose(),head=F)

trans <- as(data,"transactions") #数据格式转换

#inspect(trans) #数据查看

image(trans [1:50])

itemFrequencyPlot(trans, support=0.5) 

itemFrequencyPlot(trans, topN=10, horiz=T) 

basketSize<-size(trans)

summary(basketSize)

## Min. 1st Qu. Median Mean 3rd Qu. Max.

## 23 23 23 23 23 23

itemFreq <- itemFrequency(trans)

itemCount <- (itemFreq/sum(itemFreq))*sum(basketSize)

summary(itemCount)

## Min. 1st Qu. Median Mean 3rd Qu. Max.

## 4 156 600 1570 2346 8124

orderedItem <- sort(itemCount, decreasing = T)

orderedItem[1:10]

## V17=p V18=w V7=f V19=o V8=c V9=b V13=s V14=s V5=f V11=t

## 8124 7924 7914 7488 6812 5612 5176 4936 4748 4608

#求关联规则

rules <- apriori(trans,parameter=list(support=0.3,confidence=1))

##

## Parameter specification:

## confidence minval smax arem aval originalSupport support minlen maxlen

## 1 0.1 1 none FALSE TRUE 0.3 1 10

## target ext

## rules FALSE

##

## Algorithmic control:

## filter tree heap memopt load sort verbose

## 0.1 TRUE TRUE FALSE TRUE 2 TRUE

##

## apriori - find association rules with the apriori algorithm

## version 4.21 (2004.05.09) (c) 1996-2004 Christian Borgelt

## set item appearances ...[0 item(s)] done [0.00s].

## set transactions ...[119 item(s), 8124 transaction(s)] done [0.00s].

## sorting and recoding items ... [28 item(s)] done [0.00s].

## creating transaction tree ... done [0.02s].

## checking subsets of size 1 2 3 4 5 6 7 8 9 done [0.00s].

## writing ... [4316 rule(s)] done [0.00s].

## creating S4 object ... done [0.00s].

summary(rules)

## set of 4316 rules

##

## rule length distribution (lhs + rhs):sizes

## 1 2 3 4 5 6 7 8 9

## 1 42 293 832 1244 1107 594 179 24

##

## Min. 1st Qu. Median Mean 3rd Qu. Max.

## 1.00 4.00 5.00 5.32 6.00 9.00

##

## summary of quality measures:

## support confidence lift

## Min. :0.3003 Min. :1 Min. :1.000

## 1st Qu.:0.3112 1st Qu.:1 1st Qu.:1.000

## Median :0.3299 Median :1 Median :1.025

## Mean :0.3540 Mean :1 Mean :1.141

## 3rd Qu.:0.3712 3rd Qu.:1 3rd Qu.:1.027

## Max. :1.0000 Max. :1 Max. :2.927

##

## mining info:

## data ntransactions support confidence

## trans 8124 0.3 1

inspect(rules[1:10])

## lhs rhs support confidence lift

## 1 {} => {V17=p} 1.0000000 1 1.000000

## 2 {V12=?} => {V17=p} 0.3052683 1 1.000000

## 3 {V9=n} => {V19=o} 0.3092073 1 1.084936

## 4 {V9=n} => {V7=f} 0.3092073 1 1.026535

## 5 {V9=n} => {V17=p} 0.3092073 1 1.000000

## 6 {V3=s} => {V17=p} 0.3146233 1 1.000000

## 7 {V20=e} => {V7=f} 0.3417036 1 1.026535

## 8 {V20=e} => {V17=p} 0.3417036 1 1.000000

## 9 {V23=d} => {V18=w} 0.3874938 1 1.025240

## 10 {V23=d} => {V17=p} 0.3874938 1 1.000000

edible <- subset(rules, rhs %in% c("V1=e"))

inspect(edible[1:10])

## lhs rhs support confidence lift

## 126 {V6=n,V11=t} => {V1=e} 0.3072378 1 1.930608

## 578 {V6=n,V9=b,V11=t} => {V1=e} 0.3072378 1 1.930608

## 581 {V6=n,V11=t,V19=o} => {V1=e} 0.3072378 1 1.930608

## 583 {V6=n,V7=f,V11=t} => {V1=e} 0.3072378 1 1.930608

## 585 {V6=n,V11=t,V18=w} => {V1=e} 0.3072378 1 1.930608

## 587 {V6=n,V11=t,V17=p} => {V1=e} 0.3072378 1 1.930608

## 590 {V6=n,V9=b,V19=o} => {V1=e} 0.3308715 1 1.930608

## 1595 {V6=n,V9=b,V11=t,V19=o} => {V1=e} 0.3072378 1 1.930608

## 1599 {V6=n,V7=f,V9=b,V11=t} => {V1=e} 0.3072378 1 1.930608

## 1603 {V6=n,V9=b,V11=t,V18=w} => {V1=e} 0.3072378 1 1.930608

#规则保存

write(rules, file="rules.csv", sep=",", quote=TRUE, row.names=FALSE)

rules_df <- as(rules, "data.frame")


利用arulesViz对结果进行可视化

#可视化

library(grid)

library(RColorBrewer)

library(arulesViz)

##

## Attaching package: 'arulesViz'

##

## The following object is masked from 'package:arules':

##

## abbreviate

##

## The following object is masked from 'package:base':

##

## abbreviate

mushroom.rules <- apriori(trans,parameter = list(support = 0.8, confidence = 1)) 

##

## Parameter specification:

## confidence minval smax arem aval originalSupport support minlen maxlen

## 1 0.1 1 none FALSE TRUE 0.8 1 10

## target ext

## rules FALSE

##

## Algorithmic control:

## filter tree heap memopt load sort verbose

## 0.1 TRUE TRUE FALSE TRUE 2 TRUE

##

## apriori - find association rules with the apriori algorithm

## version 4.21 (2004.05.09) (c) 1996-2004 Christian Borgelt

## set item appearances ...[0 item(s)] done [0.00s].

## set transactions ...[119 item(s), 8124 transaction(s)] done [0.02s].

## sorting and recoding items ... [5 item(s)] done [0.00s].

## creating transaction tree ... done [0.00s].

## checking subsets of size 1 2 3 4 done [0.00s].

## writing ... [16 rule(s)] done [0.00s].

## creating S4 object ... done [0.00s].

plot(mushroom.rules,

control=list(jitter=2, col = rev(brewer.pal(9, "Greens")[4:9])),

shading = "lift")

plot(mushroom.rules, method="grouped",

control=list(k=100,col = rev(brewer.pal(9, "Greens")[4:9])))

plot(edible[1:20], measure="confidence", method="graph",

control=list(type="items"), shading = "lift")

plot(edible, method="paracoord", control=list(reorder=TRUE))



蘑菇数据的决策树分类介绍详见

反馈与建议

Author:shangfr
邮箱:shangfr@foxmail.com


相关阅读:
Top