如何访问Stanford Parser返回的依赖树和选区树中的各个节点

本文关键字:依赖 选区 节点 返回 何访问 访问 Parser Stanford | 更新日期: 2023-09-27 17:59:27

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using java.io;
using edu.stanford.nlp.process;
using edu.stanford.nlp.ling;
using edu.stanford.nlp.trees;
using edu.stanford.nlp.parser.lexparser;
using Console = System.Console;
namespace Parser
{   
    class Parser
    {
        //loads the lexical parser
        private static LexicalizedParser LoadLexicalizedParser()
        {
            // Path to models extracted from `stanford-parser-3.5.2-models.jar`
            var jarRoot = @"E:'Project'stanford-parser-full-2015-04-20'stanford-parser-3.5.2-models";
            var modelsDirectory = jarRoot + @"'edu'stanford'nlp'models";
            // Loading english PCFG parser from file
            var lp = LexicalizedParser.loadModel(modelsDirectory + @"'lexparser'englishPCFG.ser.gz");
            return lp;
        }
        //gets the lexical tree for a 'sentence'
        private static Tree GetLexicalTree(LexicalizedParser lp, string sentence)
        {
            string[] words = sentence.Split(' ');   
            // This sample shows parsing a list of correctly tokenized words            
            var rawWords = Sentence.toCoreLabelList(words);
            var tree = lp.apply(rawWords);
            return tree;
        }
        //gets the constituency tree from the lexical 'tree' as a string
        private static string GetConstituencyTree(Tree tree)
        {
            return tree.pennString();
        }
        //gets the dependency tree from the lexical 'tree' as a string
        private static string GetDependencyTree(Tree tree)
        {
            // Extract dependencies from lexical tree
            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.grammaticalStructureFactory();
            var gs = gsf.newGrammaticalStructure(tree);
            var tdl = gs.typedDependenciesCCprocessed();
            string dependencyTree = String.Empty;
            for (int i = 0; i < tdl.size(); ++i)
                dependencyTree += tdl.get(i) + "'n";
            return dependencyTree;
        }

        static void Main()
        {
            var lp = LoadLexicalizedParser();
            string sentence = "This is an easy sentence.";
            Tree tree = GetLexicalTree(lp, sentence);
            string constituencyTree = GetConstituencyTree(tree);
            string dependencyTree = GetDependencyTree(tree);
            Console.WriteLine("Constituency Tree'n" + constituencyTree);
            Console.WriteLine("Dependency Tree'n" + dependencyTree);
            //// Extract collapsed dependencies from parsed tree
            //var tp = new TreePrint("penn,typedDependenciesCollapsed");
            //tp.printTree(tree);
        }
    }
}

在这段代码中,我将选区树和依赖树作为字符串。但我想使用"树"类型本身来使用它们,即我想访问和操作变量"树"的节点。有什么办法我能做到吗?或者,我必须创建自己的树数据结构,并通过处理字符串("constituencyTree"answers"dependencyTree"(来获得各个节点?

[我目前正在做的一个小项目需要这个。]

如何访问Stanford Parser返回的依赖树和选区树中的各个节点

是的,有大量的数据结构可用于选区树和依赖树。对于选区树,您希望使用Tree数据结构,该数据结构具有许多有用的内置功能,用于遍历树、获取所有终端节点等。

对于依赖树,您可以使用TypedDependency对象列表,其中每个TypedDependency表示调控器词和依赖词之间的关系,也可以使用SemanticGraph。要将示例中命名为tdlTypedDependency列表转换为SemanticGraph,只需将该列表传递给构造函数:

SemanticGraph sg = new SemanticGraph(tdl);