Lucene查询在标准分析器上没有返回命中

本文关键字:返回 分析器 查询 标准 标准分 Lucene | 更新日期: 2023-09-27 18:13:28

我有一个文件名thatfeelwhen.pdf,当我搜索像"that"或"feel"这样的词时,我没有得到命中,当我输入"when"或整个文件名时。我用的是标准分析仪。我怎么能得到搜索Lucene匹配一切?我的搜索查询似乎与文件内的内容匹配,但不是在文件名中。

public partial class _Default : Page
{
    Directory finalDirectory = null;
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);

其他方法中的代码:

private static void AddTextToIndex(string filename, string pdfBody, IndexWriter writer)
    {
        Document doc = new Document();
        doc.Add(new Field("fileName", filename.ToString(), Field.Store.YES, Field.Index.ANALYZED));
        doc.Add(new Field("pdfBody", pdfBody.ToString(), Field.Store.NO, Field.Index.ANALYZED));
        writer.AddDocument(doc);
    }
    private static Directory buildIndex(Analyzer analyzer)
    {
        string[] syllabusFiles = System.IO.Directory.GetFiles(@"C:'mywebsite'files'forms");
        Directory directory = FSDirectory.Open(new DirectoryInfo(@"C:'mywebsite'files'LuceneIndex"));           
        var writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
        int j = 0;
        while (j < syllabusFiles.Length)
        {
            string pdfTextExtracted = pdfText(syllabusFiles[j]);
            string fileNameOnly = syllabusFiles[j].Replace("C:''website''files''forms", "");
            AddTextToIndex(fileNameOnly, pdfTextExtracted, writer);
            j++;
        }
        writer.Optimize();
        writer.Dispose();
        return directory;
    }
    protected void txtBoxSearchPDF_Click(object sender, EventArgs e)
    {
        if (txtBoxSearchString.Text == "")
        {
            lblNoSearchString.Visible = true;               
        }
        else if (txtBoxSearchString.Text == "build_index")
        {
            this.finalDirectory = buildIndex(this.analyzer);
        }
        else
        {
            //searching PDF text
            lblNoSearchString.Visible = false;
            StringBuilder sb = new StringBuilder();
            this.finalDirectory = FSDirectory.Open(new DirectoryInfo(@"C:'mywebsite'files'LuceneIndex"));
            IndexReader indexReader = IndexReader.Open(this.finalDirectory, true);
            Searcher indexSearch = new IndexSearcher(indexReader);
            string searchQuery = txtBoxSearchString.Text;
            var fields = new[] { "fileName", "pdfBody" };
            var queryParser = new MultiFieldQueryParser(Version.LUCENE_30, fields, this.analyzer);
            Query query;
            try
            {
                query = queryParser.Parse(searchQuery.Trim());
            }
            catch (ParseException)
            {
                query = queryParser.Parse(QueryParser.Escape(searchQuery.Trim()));
            }
            TopDocs resultDocs = indexSearch.Search(query, indexReader.MaxDoc);                
            var hits = resultDocs.ScoreDocs;
            foreach (var hit in hits)
            {
                var documentFromSearcher = indexSearch.Doc(hit.Doc);
                string getResult = documentFromSearcher.Get("fileName");
                string formattedResult = getResult.Replace(" ", "%20");
                sb.AppendLine(@"<a href=https://website.com/search/forms/" + formattedResult+ ">" + getResult+"</a>");
                sb.AppendLine("<br>");
            }

Lucene查询在标准分析器上没有返回命中

我选择使用Analyzer analyzer = new SingleCharTokenAnalyzer();,并得到更好的结果。

我尝试了Simple, Standard, Whitespace, and Keyword Analyzers,但没有一个能够真正满足我的需求,除非我需要创建额外的工作来定制它们。