挖掘DBLP作者合作关系,FP-Growth算法实践(5):挖掘研究者合作
发布时间:2021-05-25 23:36:17 所属栏目:大数据 来源:网络整理
导读:副标题#e# 就是频繁项集挖掘,FP-Growth算法。 先产生headerTable: 数据结构(其实也是调了好几次代码才确定的,因为一开始总有想不到的东西):entry: entry: {authorName: frequence,firstChildPointer,startYear,endYear} def CreateHeaderTable(tranDB
def FindParentTreeNodes(baseTreeNode):
parentTreeNodes=[]
while baseTreeNode.parentPointer is not None: #while baseTreeNode is not the ROOT node whose parentPointer is None and authorName is "NULL"
parentTreeNodes.append(baseTreeNode.authorName)
baseTreeNode=baseTreeNode.parentPointer
return parentTreeNodes
def FindCondAuthorDB(firstChildPointerTreeNode):
condAuthorDB={} #entry: {frozenset(authorListSet): frequence}
tempTreeNode=firstChildPointerTreeNode
while tempTreeNode is not None:
parentTreeNodes=FindParentTreeNodes(tempTreeNode)
if len(parentTreeNodes)>1:
condAuthorDB[frozenset(parentTreeNodes[1:])]=tempTreeNode.frequence
#parentTreeNodes[1:],remove self treeNode
tempTreeNode=tempTreeNode.brotherPointer
return condAuthorDB
def CreateCondHeaderTable(condAuthorDB,minSupport=1):
condHeaderTable={} #entry: {authorName: frequence,firstChildPointer}
for i,frequence) in enumerate(condAuthorDB.items()):
print "cond trans","=="*20
for author in authorListSet:
if condHeaderTable.has_key(author):
headerTable[author][0]+=frequence
else:
condHeaderTable[author]=[frequence,None]
for author in condHeaderTable.keys():
if condHeaderTable[author][0]<minSupport:
del condHeaderTable[author]
return condHeaderTable
'''
headerTable={} #entry: {authorName: frequence,endYear}
if we want to call MineCondTree(),brotherPointer=None)
'''
def MineTree(treeRoot,minSupport=1,baseFreqAuthorSet=set([]),finalFreqAuthorPattDict={}):
sortedAuthorsList=[value[0] for value in sorted(headerTable.items(),reverse=False)]
for baseAuthor in sortedAuthorsList: #start from bottom of headerTable
newFreqAuthorSet=baseFreqAuthorSet.copy()
newFreqAuthorSet.add(baseAuthor)
finalFreqAuthorPattDict[frozenset(newFreqAuthorSet)]=headerTable[baseAuthor][0]
condAuthorDB=FindCondAuthorDB(headerTable[baseAuthor][1]) #[1] is the firstChildPointer
condHeaderTable=CreateCondHeaderTable(condAuthorDB,minSupport)
condTreeRoot=CreateTree(condAuthorDB,condHeaderTable) #call CreateCondTree()
if condHeaderTable is not None:
MineTree(condTreeRoot,condHeaderTable,minSupport,newFreqAuthorSet,finalFreqAuthorPattDict) #call MineCondTree()
(编辑:黄山站长网) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |
站长推荐


