Lucene.NET:如何使用BlockJoinQuery
本文关键字:何使用 BlockJoinQuery NET Lucene | 更新日期: 2023-09-27 18:12:33
我正在尝试用Lucene做一个关系搜索。NET 4.8(实际上我是用最新的源代码编译的)。参考Lucene.Net
, Lucene.Net.Analysis.Common
, Lucene.Net.Grouping
, Lucene.Net.Join
, Lucene.Net.QueryParser
问题是:我没有得到任何结果。在下面的例子中,我认为blog
是parent
,而comments
是children
。我想找到一个包含first
的博客,其中有一个包含like
的评论(这是一个Id
1)。
如何修复这个示例代码?
static void BlockJoinQueryTest(string dbFolder)
{
var analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48);
var config = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer);
config.SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE_OR_APPEND);
var indexPathBlog = dbFolder + "''blog_db";
if (System.IO.Directory.Exists(indexPathBlog))
{
System.IO.Directory.Delete(indexPathBlog, true);
}
System.IO.Directory.CreateDirectory(indexPathBlog);
var indexDirectoryBlog = FSDirectory.Open(new System.IO.DirectoryInfo(indexPathBlog));
var indexWriterBlog = new IndexWriter(indexDirectoryBlog, config);
Document comment = new Document();
comment.Add(new TextField("BlogId", "1", Field.Store.YES));
comment.Add(new TextField("CommentContent", "I like your first blog!", Field.Store.YES));
comment.Add(new TextField("Type", "comment", Field.Store.YES));
comment.Add(new TextField("Note", "child", Field.Store.YES));
indexWriterBlog.AddDocument(comment);
comment = new Document();
comment.Add(new TextField("BlogId", "1", Field.Store.YES));
comment.Add(new TextField("CommentContent", "Not that great.", Field.Store.YES));
comment.Add(new TextField("Type", "comment", Field.Store.YES));
comment.Add(new TextField("Note", "child", Field.Store.YES));
indexWriterBlog.AddDocument(comment);
Document blog = new Document();
blog.Add(new TextField("Id", "1", Field.Store.YES));
blog.Add(new TextField("BlogContent", "Content of first blog", Field.Store.YES));
blog.Add(new TextField("Type", "blog", Field.Store.YES));
blog.Add(new TextField("Note", "parent", Field.Store.YES));
indexWriterBlog.AddDocument(blog);
blog = new Document();
blog.Add(new TextField("Id", "2", Field.Store.YES));
blog.Add(new TextField("BlogContent", "This is the second blog!", Field.Store.YES));
blog.Add(new TextField("Type", "blog", Field.Store.YES));
blog.Add(new TextField("Note", "parent", Field.Store.YES));
indexWriterBlog.AddDocument(blog);
indexWriterBlog.Commit();
var searcher = new IndexSearcher(DirectoryReader.Open(indexDirectoryBlog));
Console.WriteLine("Begin content enumeration:");
for (int i = 0; i < searcher.IndexReader.MaxDoc; i++)
{
var doc = searcher.IndexReader.Document(i);
Console.WriteLine("Document " + i + ": " + doc.ToString());
}
Console.WriteLine("End content enumeration.");
Filter blogs = new CachingWrapperFilter(
new QueryWrapperFilter(
new TermQuery(
new Term("Type", "blog"))));
BooleanQuery commentQuery = new BooleanQuery();
commentQuery.Add(new TermQuery(new Term("CommentContent", "like")), BooleanClause.Occur.MUST);
//commentQuery.Add(new TermQuery(new Term("BlogId", "1")), BooleanClause.Occur.MUST);
var commentJoinQuery = new ToParentBlockJoinQuery(
commentQuery,
blogs,
ScoreMode.None);
BooleanQuery query = new BooleanQuery();
query.Add(new TermQuery(new Term("BlogContent", "first")), BooleanClause.Occur.MUST);
query.Add(commentQuery, BooleanClause.Occur.MUST);
var c = new ToParentBlockJoinCollector(
Sort.RELEVANCE, // sort
10, // numHits
true, // trackScores
false // trackMaxScore
);
searcher.Search(query, c);
int maxDocsPerGroup = 10;
var hits = c.GetTopGroups(
commentJoinQuery,
Sort.INDEXORDER,
0, // offset
maxDocsPerGroup, // maxDocsPerGroup
0, // withinGroupOffset
true // fillSortFields
);
if (hits != null)
{
Console.WriteLine("Found " + hits.TotalGroupCount + " groups:");
for (int i = 0; i < hits.TotalGroupCount; i++)
{
var group = hits.Groups[i];
Console.WriteLine("Group " + i + ": " + group.ToString());
for (int j = 0; j < group.TotalHits && j < maxDocsPerGroup; j++)
{
Document doc = searcher.Doc(group.ScoreDocs[j].Doc);
Console.WriteLine("Hit " + i + ": " + doc.ToString());
}
}
}
else
{
Console.WriteLine("No hits.");
}
Console.WriteLine("Done.");
您不需要将Documents
添加为IEnumerable
,因此它们被"阻止"吗?
http://blog.mikemccandless.com/2012/01/searching-relational-content-with.html解释更多的
编辑:我试着用下面的代码做这件事,这似乎也不起作用,如果有人能透露任何信息,我会很感激的?
var analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48);
var config = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer);
config.SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE_OR_APPEND);
var indexPathBlog = "D:''Test";
if (System.IO.Directory.Exists(indexPathBlog))
{
System.IO.Directory.Delete(indexPathBlog, true);
}
System.IO.Directory.CreateDirectory(indexPathBlog);
var indexDirectoryBlog = FSDirectory.Open(new System.IO.DirectoryInfo(indexPathBlog));
var indexWriterBlog = new IndexWriter(indexDirectoryBlog, config);
var one = new List<Document>();
var two = new List<Document>();
var blogOne = new Document();
blogOne.Add(new TextField("Id", "1", Field.Store.YES));
blogOne.Add(new TextField("BlogContent", "Content of first blog", Field.Store.YES));
blogOne.Add(new TextField("Type", "blog", Field.Store.YES));
blogOne.Add(new TextField("Note", "parent", Field.Store.YES));
one.Add(blogOne);
Document commentOne = new Document();
commentOne.Add(new TextField("BlogId", "1", Field.Store.YES));
commentOne.Add(new TextField("CommentContent", "I like your first blog!", Field.Store.YES));
commentOne.Add(new TextField("Type", "comment", Field.Store.YES));
commentOne.Add(new TextField("Note", "child", Field.Store.YES));
one.Add(commentOne);
Document blogTwo = new Document();
blogTwo.Add(new TextField("Id", "2", Field.Store.YES));
blogTwo.Add(new TextField("BlogContent", "This is the second blog!", Field.Store.YES));
blogTwo.Add(new TextField("Type", "blog", Field.Store.YES));
blogTwo.Add(new TextField("Note", "parent", Field.Store.YES));
two.Add(blogTwo);
var commentTwo = new Document();
commentTwo.Add(new TextField("BlogId", "2", Field.Store.YES));
commentTwo.Add(new TextField("CommentContent", "Not that great.", Field.Store.YES));
commentTwo.Add(new TextField("Type", "comment", Field.Store.YES));
commentTwo.Add(new TextField("Note", "child", Field.Store.YES));
two.Add(commentTwo);
indexWriterBlog.AddDocuments(one);
indexWriterBlog.AddDocuments(two);
indexWriterBlog.Commit();
var searcher = new IndexSearcher(DirectoryReader.Open(indexDirectoryBlog));
Filter parentQuery =
new QueryWrapperFilter(
new TermQuery(
new Term("type", "blog")));
BooleanQuery childQuery = new BooleanQuery();
childQuery.Add(new TermQuery(new Term("CommentContent", "I like your first blog!")), BooleanClause.Occur.MUST);
var commentJoinQuery = new ToParentBlockJoinQuery(
childQuery,
parentQuery,
ScoreMode.None);
BooleanQuery query = new BooleanQuery();
//query.Add(new TermQuery(new Term("Type", "blog")), BooleanClause.Occur.MUST);
query.Add(commentJoinQuery, BooleanClause.Occur.MUST);
var c = new ToParentBlockJoinCollector(
Sort.RELEVANCE, // sort
10, // numHits
false, // trackScores
false // trackMaxScore
);
searcher.Search(commentJoinQuery, c);
int maxDocsPerGroup = 10;
var hits = c.GetTopGroups(
commentJoinQuery,
Sort.INDEXORDER,
0, // offset
maxDocsPerGroup, // maxDocsPerGroup
0, // withinGroupOffset
true // fillSortFields
);
if (hits != null)
{
Console.WriteLine("Found " + hits.TotalGroupCount + " groups:");
for (int i = 0; i < hits.TotalGroupCount; i++)
{
var group = hits.Groups[i];
Console.WriteLine("Group " + i + ": " + group.ToString());
for (int j = 0; j < group.TotalHits && j < maxDocsPerGroup; j++)
{
Document doc = searcher.Doc(group.ScoreDocs[j].Doc);
Console.WriteLine("Hit " + i + ": " + doc.ToString());
}
}
}
else
{
Console.WriteLine("No hits.");
}
Console.WriteLine("Done.");
我也偶然发现了这个问题,并设法修复了它。
- @Ant在声明父文档必须是块中的最后一个时是正确的。
但是代码
还有两个问题出于某种原因-我很抱歉不是Lucene专家-当CommentCOntent是一个句子("我喜欢你的第一个博客!")并且你使用Term查询搜索它时,你不会得到任何结果。我想这和对场的分析有关。因此,我所做的是将字段内容替换为"blog"
现在IndexSercher似乎找到了一个结果,但抛出了一个错误为"系统。'parentFilter必须返回FixedBitSet;有Lucene.Net.Search.QueryWrapperFilter + DocIdSetAnonymousInnerClassHelper"通过lucene.net (Github)的测试用例,我看到我必须在FixedBitSetCachingWrapperFilter中包装parentQuery:
Filter parentQuery = new FixedBitSetCachingWrapperFilter( new QueryWrapperFilter( new TermQuery( new Term("Type", "blog"))));
完整代码是:
var analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48);
var config = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer);
config.SetOpenMode(OpenMode.CREATE_OR_APPEND);
var indexPathBlog = Path.Combine(Environment.CurrentDirectory, "index");
if (System.IO.Directory.Exists(indexPathBlog))
{
System.IO.Directory.Delete(indexPathBlog, true);
}
System.IO.Directory.CreateDirectory(indexPathBlog);
var indexDirectoryBlog = FSDirectory.Open(new System.IO.DirectoryInfo(indexPathBlog));
var indexWriterBlog = new IndexWriter(indexDirectoryBlog, config);
var one = new List<Document>();
var two = new List<Document>();
Document commentOne = new Document();
commentOne.Add(new TextField("BlogId", "1", Field.Store.YES));
commentOne.Add(new TextField("CommentContent", "blog", Field.Store.YES));
commentOne.Add(new TextField("Type", "comment", Field.Store.YES));
commentOne.Add(new TextField("Note", "child", Field.Store.YES));
one.Add(commentOne);
var blogOne = new Document();
blogOne.Add(new TextField("Id", "1", Field.Store.YES));
blogOne.Add(new TextField("BlogContent", "Content of first blog!", Field.Store.YES));
blogOne.Add(new TextField("Type", "blog", Field.Store.NO));
blogOne.Add(new TextField("Note", "parent", Field.Store.YES));
one.Add(blogOne);
var commentTwo = new Document();
commentTwo.Add(new TextField("BlogId", "2", Field.Store.YES));
commentTwo.Add(new TextField("CommentContent", "Not that great.", Field.Store.YES));
commentTwo.Add(new TextField("Type", "comment", Field.Store.YES));
commentTwo.Add(new TextField("Note", "child", Field.Store.YES));
two.Add(commentTwo);
Document blogTwo = new Document();
blogTwo.Add(new TextField("Id", "2", Field.Store.YES));
blogTwo.Add(new TextField("BlogContent", "This is the second blog!", Field.Store.YES));
blogTwo.Add(new TextField("Type", "blog", Field.Store.NO));
blogTwo.Add(new TextField("Note", "parent", Field.Store.YES));
two.Add(blogTwo);
indexWriterBlog.AddDocuments(one);
indexWriterBlog.AddDocuments(two);
indexWriterBlog.Commit();
var searcher = new IndexSearcher(DirectoryReader.Open(indexDirectoryBlog));
Filter parentQuery =
new FixedBitSetCachingWrapperFilter(
new QueryWrapperFilter(
new TermQuery(
new Term("Type", "blog"))));
BooleanQuery childQuery = new BooleanQuery();
childQuery.Add(new TermQuery(new Term("CommentContent", "blog")), Occur.MUST);
var commentJoinQuery = new ToParentBlockJoinQuery(
childQuery,
parentQuery,
ScoreMode.None);
BooleanQuery query = new BooleanQuery();
//query.Add(new TermQuery(new Term("Type", "blog")), BooleanClause.Occur.MUST);
query.Add(commentJoinQuery, Occur.MUST);
var c = new ToParentBlockJoinCollector(
Sort.RELEVANCE, // sort
10, // numHits
false, // trackScores
false // trackMaxScore
);
searcher.Search(commentJoinQuery, c);
int maxDocsPerGroup = 10;
var hits = c.GetTopGroups(
commentJoinQuery,
Sort.INDEXORDER,
0, // offset
maxDocsPerGroup, // maxDocsPerGroup
0, // withinGroupOffset
true // fillSortFields
);
if (hits != null)
{
Console.WriteLine("Found " + hits.TotalGroupCount + " groups:");
for (int i = 0; i < hits.TotalGroupCount; i++)
{
var group = hits.Groups[i];
Console.WriteLine("Group " + i + ": " + group.ToString());
for (int j = 0; j < group.TotalHits && j < maxDocsPerGroup; j++)
{
Document doc = searcher.Doc(group.ScoreDocs[j].Doc);
Console.WriteLine("Hit " + i + ": " + doc.ToString());
}
}
}
else
{
Console.WriteLine("No hits.");
}
Console.WriteLine("Done.");
Console.ReadKey();
请注意,我在。net核心控制台应用程序中使用了以下包:
<PackageReference Include="Lucene.Net" Version="4.8.0-beta00005" />
<PackageReference Include="Lucene.Net.Analysis.Common" Version="4.8.0-beta00005" />
<PackageReference Include="Lucene.Net.Join" Version="4.8.0-beta00005" />