huichen · lijingpeng · Dec 1, 2015 · Dec 1, 2015 · Dec 3, 2015 · Dec 3, 2015
diff --git a/core/indexer.go b/core/indexer.go
@@ -1,6 +1,7 @@
 package core
 
 import (
+	"github.com/AlasdairF/Sort/Uint64"
 	"github.com/huichen/wukong/types"
 	"github.com/huichen/wukong/utils"
 	"log"
@@ -131,7 +132,7 @@ func (indexer *Indexer) AddDocument(document *types.DocumentIndex) {
 // 查找包含全部搜索键(AND操作)的文档
 // 当docIds不为nil时仅从docIds指定的文档中查找
 func (indexer *Indexer) Lookup(
-	tokens []string, labels []string, docIds map[uint64]bool, countDocsOnly bool) (docs []types.IndexedDocument, numDocs int) {
+	tokens []string, labels []string, docIds map[uint64]bool, countDocsOnly bool, logicExpression ...types.LogicExpression) (docs []types.IndexedDocument, numDocs int) {
 	if indexer.initialized == false {
 		log.Fatal("索引器尚未初始化")
 	}
@@ -141,6 +142,12 @@ func (indexer *Indexer) Lookup(
 	}
 	numDocs = 0
 
+	if logicExpression != nil && len(logicExpression) > 0 && (len(logicExpression[0].MustLabels) > 0 || len(logicExpression[0].ShouldLabels) > 0) &&
+		len(logicExpression[0].NotInLabels) >= 0 {
+		docs, numDocs = indexer.LogicLookup(docIds, countDocsOnly, logicExpression[0])
+		return
+	}
+
 	// 合并关键词和标签为搜索键
 	keywords := make([]string, len(tokens)+len(labels))
 	copy(keywords, tokens)
@@ -176,9 +183,6 @@ func (indexer *Indexer) Lookup(
 	for ; indexPointers[0] >= 0; indexPointers[0]-- {
 		// 以第一个搜索键出现的文档作为基准，并遍历其他搜索键搜索同一文档
 		baseDocId := indexer.getDocId(table[0], indexPointers[0])
-		if _, ok := indexer.tableLock.docs[baseDocId]; !ok {
-			continue
-		}
 
 		if docIds != nil {
 			_, found := docIds[baseDocId]
@@ -211,7 +215,8 @@ func (indexer *Indexer) Lookup(
 			}
 		}
 
-		if found {
+		_, ok := indexer.tableLock.docs[baseDocId]
+		if found && ok {
 			indexedDoc := types.IndexedDocument{}
 
 			// 当为LocationsIndex时计算关键词紧邻距离
@@ -420,6 +425,171 @@ func (indexer *Indexer) RemoveDoc(docId uint64) {
 
 	indexer.tableLock.Lock()
 	delete(indexer.tableLock.docs, docId)
-	indexer.numDocuments--
 	indexer.tableLock.Unlock()
 }
+
+func (indexer *Indexer) LogicLookup(docIds map[uint64]bool, countDocsOnly bool, LogicExpression types.LogicExpression) (docs []types.IndexedDocument, numDocs int) {
+	indexer.tableLock.RLock()
+	defer indexer.tableLock.RUnlock()
+	// 有效性检查, 不允许只出现逻辑非检索, 也不允许与或非都不存在
+	if len(LogicExpression.MustLabels) == 0 && len(LogicExpression.ShouldLabels) == 0 &&
+		len(LogicExpression.NotInLabels) >= 0 {
+		return
+	}
+
+	// MustTable中的搜索键检查
+	// 如果存在与搜索键， 则要求所有的与搜索键都有对应的反向表
+	MustTable := make([]*KeywordIndices, 0)
+	if len(LogicExpression.MustLabels) > 0 {
+		for _, keyword := range LogicExpression.MustLabels {
+			indices, found := indexer.tableLock.table[keyword]
+			if !found {
+				return
+			} else {
+				MustTable = append(MustTable, indices)
+			}
+		}
+	}
+
+	// 逻辑或搜索键检查
+	// 1. 如果存在逻辑或搜索键， 则至少有一个存在反向表
+	// 2. 逻辑或和逻辑与之间是与关系
+	ShouldTable := make([]*KeywordIndices, 0)
+	if len(LogicExpression.ShouldLabels) > 0 {
+		for _, keyword := range LogicExpression.ShouldLabels {
+			indices, found := indexer.tableLock.table[keyword]
+			if found {
+				ShouldTable = append(ShouldTable, indices)
+			}
+		}
+		if len(ShouldTable) == 0 {
+			// 如果存在逻辑或搜索键， 但是对应的反向表全部为空， 则返回
+			return
+		}
+	}
+
+	// 逻辑非中的搜索键检查
+	// 可以不存在逻辑非搜索（NotInTable为空）， 允许逻辑非搜索键对应的反向表为空
+	NotInTable := make([]*KeywordIndices, 0)
+	for _, keyword := range LogicExpression.NotInLabels {
+		indices, found := indexer.tableLock.table[keyword]
+		if found {
+			NotInTable = append(NotInTable, indices)
+		}
+	}
+
+	// 开始检索
+	numDocs = 0
+	if len(LogicExpression.MustLabels) > 0 {
+		// 如果存在逻辑与检索
+		for idx := indexer.getIndexLength(MustTable[0]) - 1; idx >= 0; idx-- {
+			baseDocId := indexer.getDocId(MustTable[0], idx)
+			if docIds != nil {
+				_, found := docIds[baseDocId]
+				if !found {
+					continue
+				}
+			}
+
+			mustFound := indexer.findInMustTable(MustTable[1:], baseDocId)
+			shouldFound := indexer.findInShouldTable(ShouldTable, baseDocId)
+			notInFound := indexer.findInNotInTable(NotInTable, baseDocId)
+
+			if mustFound && shouldFound && !notInFound {
+				indexedDoc := types.IndexedDocument{}
+				indexedDoc.DocId = baseDocId
+				if !countDocsOnly {
+					docs = append(docs, indexedDoc)
+				}
+				numDocs++
+			}
+		}
+	} else {
+		// 不存在逻辑与检索， 则必须存在逻辑或检索
+		// 这时进行求并集操作
+		docs, numDocs = indexer.unionTable(ShouldTable, NotInTable, countDocsOnly)
+	}
+	return
+}
+
+// 在逻辑与反向表中对docid进行查找， 若每个反向表都找到， 则返回true， 有一个找不到则返回false
+func (indexer *Indexer) findInMustTable(table []*KeywordIndices, docId uint64) bool {
+	for i := 0; i < len(table); i++ {
+		_, foundDocId := indexer.searchIndex(table[i],
+			0, indexer.getIndexLength(table[i])-1, docId)
+		if !foundDocId {
+			return false
+		}
+	}
+	return true
+}
+
+// 在逻辑或反向表中对docid进行查找， 若有一个找到则返回true， 都找不到则返回false
+// 如果table为空， 则返回true
+func (indexer *Indexer) findInShouldTable(table []*KeywordIndices, docId uint64) bool {
+	for i := 0; i < len(table); i++ {
+		_, foundDocId := indexer.searchIndex(table[i],
+			0, indexer.getIndexLength(table[i])-1, docId)
+		if foundDocId {
+			return true
+		}
+	}
+
+	if len(table) == 0 {
+		return true
+	} else {
+		return false
+	}
+}
+
+// 在逻辑非反向表中对docid进行查找， 若有一个找到则返回true， 都找不到则返回false
+// 如果table为空， 则返回false
+func (indexer *Indexer) findInNotInTable(table []*KeywordIndices, docId uint64) bool {
+	for i := 0; i < len(table); i++ {
+		_, foundDocId := indexer.searchIndex(table[i],
+			0, indexer.getIndexLength(table[i])-1, docId)
+		if foundDocId {
+			return true
+		}
+	}
+	return false
+}
+
+// 如果不存在与逻辑检索， 则需要对逻辑或反向表求并集
+// 先求差集再求并集， 可以减小内存占用
+// docid要保序
+func (indexer *Indexer) unionTable(table []*KeywordIndices, notInTable []*KeywordIndices, countDocsOnly bool) (
+	docs []types.IndexedDocument, numDocs int) {
+	docIds := make([]uint64, 0)
+	// 求并集
+	for i := 0; i < len(table); i++ {
+		for _, docid := range table[i].docIds {
+			if !indexer.findInNotInTable(notInTable, docid) {
+				found := false
+				for _, v := range docIds {
+					if v == docid {
+						found = true
+						break
+					}
+				}
+				if !found {
+					docIds = append(docIds, docid)
+				}
+			}
+		}
+	}
+	// 排序
+	sortUint64.StableDesc(docIds)
+
+	numDocs = 0
+	for _, doc := range docIds {
+		indexedDoc := types.IndexedDocument{}
+		indexedDoc.DocId = doc
+		if !countDocsOnly {
+			docs = append(docs, indexedDoc)
+		}
+		numDocs++
+	}
+
+	return
+}