服务器端html过滤库在asp.net mvc

本文关键字：asp net mvc html 过滤服务器端 | 更新日期: 2023-09-27 18:02:08

我用的是TinyMCE。我需要服务器端Html输入过滤功能或库。TinyMCE推荐

htmlLawed
HTMLPurifier
Zend Filter Input

所有这些只与PHP兼容。但我用的是asp.net MVC

请告诉我，在asp.net MVC中是否有可用的过滤library或attribute

例如

[HtmlInputFilter] //<<--
public ActionResult Post(string html){
...
}

服务器端html过滤库在asp.net mvc

您可以使用Microsoft AntiXSS库来清理HTML。

如果你想处理你自己的标签和属性白名单，AntiXSS库是不合适的，因为它不提供必要的自定义钩子。一种可能性是自己卷消毒液(当然风险由你自己承担)。例如，你可以用HTML Agility Pack。这是一个blog post，说明了一个样本消毒剂:

public static class HtmlUtility
{
    // Original list courtesy of Robert Beal :
    // http://www.robertbeal.com/37/sanitising-html
    private static readonly Dictionary<string, string[]> ValidHtmlTags =
        new Dictionary<string, string[]>
        {
            {"p", new string[]          {"style", "class", "align"}},
            {"div", new string[]        {"style", "class", "align"}},
            {"span", new string[]       {"style", "class"}},
            {"br", new string[]         {"style", "class"}},
            {"hr", new string[]         {"style", "class"}},
            {"label", new string[]      {"style", "class"}},
            {"h1", new string[]         {"style", "class"}},
            {"h2", new string[]         {"style", "class"}},
            {"h3", new string[]         {"style", "class"}},
            {"h4", new string[]         {"style", "class"}},
            {"h5", new string[]         {"style", "class"}},
            {"h6", new string[]         {"style", "class"}},
            {"font", new string[]       {"style", "class", "color", "face", "size"}},
            {"strong", new string[]     {"style", "class"}},
            {"b", new string[]          {"style", "class"}},
            {"em", new string[]         {"style", "class"}},
            {"i", new string[]          {"style", "class"}},
            {"u", new string[]          {"style", "class"}},
            {"strike", new string[]     {"style", "class"}},
            {"ol", new string[]         {"style", "class"}},
            {"ul", new string[]         {"style", "class"}},
            {"li", new string[]         {"style", "class"}},
            {"blockquote", new string[] {"style", "class"}},
            {"code", new string[]       {"style", "class"}},
            {"a", new string[]          {"style", "class", "href", "title"}},
            {"img", new string[]        {"style", "class", "src", "height", "width",
                "alt", "title", "hspace", "vspace", "border"}},
            {"table", new string[]      {"style", "class"}},
            {"thead", new string[]      {"style", "class"}},
            {"tbody", new string[]      {"style", "class"}},
            {"tfoot", new string[]      {"style", "class"}},
            {"th", new string[]         {"style", "class", "scope"}},
            {"tr", new string[]         {"style", "class"}},
            {"td", new string[]         {"style", "class", "colspan"}},
            {"q", new string[]          {"style", "class", "cite"}},
            {"cite", new string[]       {"style", "class"}},
            {"abbr", new string[]       {"style", "class"}},
            {"acronym", new string[]    {"style", "class"}},
            {"del", new string[]        {"style", "class"}},
            {"ins", new string[]        {"style", "class"}}
        };
    /// <summary>
    /// Takes raw HTML input and cleans against a whitelist
    /// </summary>
    /// <param name="source">Html source</param>
    /// <returns>Clean output</returns>
    public static string SanitizeHtml(string source)
    {
        HtmlDocument html = GetHtml(source);
        if (html == null) return String.Empty;
        // All the nodes
        HtmlNode allNodes = html.DocumentNode;
        // Select whitelist tag names
        string[] whitelist = (from kv in ValidHtmlTags
                              select kv.Key).ToArray();
        // Scrub tags not in whitelist
        CleanNodes(allNodes, whitelist);
        // Filter the attributes of the remaining
        foreach (KeyValuePair<string, string[]> tag in ValidHtmlTags)
        {
            IEnumerable<HtmlNode> nodes = (from n in allNodes.DescendantsAndSelf()
                                           where n.Name == tag.Key
                                           select n);
            if (nodes == null) continue;
            foreach (var n in nodes)
            {
                if (!n.HasAttributes) continue;
                // Get all the allowed attributes for this tag
                HtmlAttribute[] attr = n.Attributes.ToArray();
                foreach (HtmlAttribute a in attr)
                {
                    if (!tag.Value.Contains(a.Name))
                    {
                        a.Remove(); // Wasn't in the list
                    }
                    else
                    {
                        // AntiXss
                        a.Value =
                            Microsoft.Security.Application.Encoder.UrlPathEncode(a.Value);
                    }
                }
            }
        }
        return allNodes.InnerHtml;
    }
    /// <summary>
    /// Takes a raw source and removes all HTML tags
    /// </summary>
    /// <param name="source"></param>
    /// <returns></returns>
    public static string StripHtml(string source)
    {
        source = SanitizeHtml(source);
        // No need to continue if we have no clean Html
        if (String.IsNullOrEmpty(source))
            return String.Empty;
        HtmlDocument html = GetHtml(source);
        StringBuilder result = new StringBuilder();
        // For each node, extract only the innerText
        foreach (HtmlNode node in html.DocumentNode.ChildNodes)
            result.Append(node.InnerText);
        return result.ToString();
    }
    /// <summary>
    /// Recursively delete nodes not in the whitelist
    /// </summary>
    private static void CleanNodes(HtmlNode node, string[] whitelist)
    {
        if (node.NodeType == HtmlNodeType.Element)
        {
            if (!whitelist.Contains(node.Name))
            {
                node.ParentNode.RemoveChild(node);
                return; // We're done
            }
        }
        if (node.HasChildNodes)
            CleanChildren(node, whitelist);
    }
    /// <summary>
    /// Apply CleanNodes to each of the child nodes
    /// </summary>
    private static void CleanChildren(HtmlNode parent, string[] whitelist)
    {
        for (int i = parent.ChildNodes.Count - 1; i >= 0; i--)
            CleanNodes(parent.ChildNodes[i], whitelist);
    }
    /// <summary>
    /// Helper function that returns an HTML document from text
    /// </summary>
    private static HtmlDocument GetHtml(string source)
    {
        HtmlDocument html = new HtmlDocument();
        html.OptionFixNestedTags = true;
        html.OptionAutoCloseOnEnd = true;
        html.OptionDefaultStreamEncoding = Encoding.UTF8;
        html.LoadHtml(source);
        return html;
    }
}