Lucene查询在标准分析器上没有返回命中
本文关键字:返回 分析器 查询 标准 标准分 Lucene | 更新日期: 2023-09-27 18:13:28
我有一个文件名thatfeelwhen.pdf
,当我搜索像"that"或"feel"这样的词时,我没有得到命中,当我输入"when"或整个文件名时。我用的是标准分析仪。我怎么能得到搜索Lucene匹配一切?我的搜索查询似乎与文件内的内容匹配,但不是在文件名中。
public partial class _Default : Page
{
Directory finalDirectory = null;
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
其他方法中的代码:
private static void AddTextToIndex(string filename, string pdfBody, IndexWriter writer)
{
Document doc = new Document();
doc.Add(new Field("fileName", filename.ToString(), Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new Field("pdfBody", pdfBody.ToString(), Field.Store.NO, Field.Index.ANALYZED));
writer.AddDocument(doc);
}
private static Directory buildIndex(Analyzer analyzer)
{
string[] syllabusFiles = System.IO.Directory.GetFiles(@"C:'mywebsite'files'forms");
Directory directory = FSDirectory.Open(new DirectoryInfo(@"C:'mywebsite'files'LuceneIndex"));
var writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
int j = 0;
while (j < syllabusFiles.Length)
{
string pdfTextExtracted = pdfText(syllabusFiles[j]);
string fileNameOnly = syllabusFiles[j].Replace("C:''website''files''forms", "");
AddTextToIndex(fileNameOnly, pdfTextExtracted, writer);
j++;
}
writer.Optimize();
writer.Dispose();
return directory;
}
protected void txtBoxSearchPDF_Click(object sender, EventArgs e)
{
if (txtBoxSearchString.Text == "")
{
lblNoSearchString.Visible = true;
}
else if (txtBoxSearchString.Text == "build_index")
{
this.finalDirectory = buildIndex(this.analyzer);
}
else
{
//searching PDF text
lblNoSearchString.Visible = false;
StringBuilder sb = new StringBuilder();
this.finalDirectory = FSDirectory.Open(new DirectoryInfo(@"C:'mywebsite'files'LuceneIndex"));
IndexReader indexReader = IndexReader.Open(this.finalDirectory, true);
Searcher indexSearch = new IndexSearcher(indexReader);
string searchQuery = txtBoxSearchString.Text;
var fields = new[] { "fileName", "pdfBody" };
var queryParser = new MultiFieldQueryParser(Version.LUCENE_30, fields, this.analyzer);
Query query;
try
{
query = queryParser.Parse(searchQuery.Trim());
}
catch (ParseException)
{
query = queryParser.Parse(QueryParser.Escape(searchQuery.Trim()));
}
TopDocs resultDocs = indexSearch.Search(query, indexReader.MaxDoc);
var hits = resultDocs.ScoreDocs;
foreach (var hit in hits)
{
var documentFromSearcher = indexSearch.Doc(hit.Doc);
string getResult = documentFromSearcher.Get("fileName");
string formattedResult = getResult.Replace(" ", "%20");
sb.AppendLine(@"<a href=https://website.com/search/forms/" + formattedResult+ ">" + getResult+"</a>");
sb.AppendLine("<br>");
}
我选择使用Analyzer analyzer = new SingleCharTokenAnalyzer();
,并得到更好的结果。
我尝试了Simple, Standard, Whitespace, and Keyword Analyzers,但没有一个能够真正满足我的需求,除非我需要创建额外的工作来定制它们。