如何知道word文档中特定文本的字体大小(例如)?
本文关键字:字体 例如 文本 word 何知道 文档 | 更新日期: 2023-09-27 17:50:39
这个想法很简单,但答案可能会很复杂:
事实上,我可以检查运行属性的字体大小
如果没有,我需要检查应用于段落的样式,以便找到为字体大小定义的运行属性,然后该样式的段落运行属性。
如果没有找到,我需要再次检查关于此样式所基于的样式的所有内容。
如果没有找到,我应该在样式层次结构中检查下面的样式,并继续下去,直到找到默认样式。
我还需要检查前一段是否有应用到它的样式。在这种情况下,应用的样式可以定义下一段的样式,影响我正在处理的文本。
如果没有样式影响我的段落,那么我需要从样式部分查看默认运行属性。之后,我应该查看同一部分的默认段落属性。
如果没有应用,则定义大小的责任交给处理文档的应用程序。
我说的对吗?
我没有任何帮助从OPenXML SDK和/或从OpenXmlPowerTools?
一个重要的方面是,这个问题几乎延伸到任何段落或运行属性,除了文本字体大小。
我的最终目标是找出一段文本是否是基于格式的部分标题(如heading1, heading2等),但看起来很难得到像"一段文本的当前格式"这样简单的东西。为了让事情变得更难,我还需要处理(部分)编号,很多时候没有应用于段落的编号格式。
谢谢,
所以,我在回答我自己的问题。
我开发了一个方法,它返回word文档段落中特定运行的"有效"运行属性。它考虑了默认的文档属性、应用的样式(包括相关的样式层次结构)以及根据标准ISO/IEC29500-1直接运行的属性。
有趣的是,Word在这两个方面似乎并没有完全遵循标准:1 -如果一个段落没有应用样式,word将应用默认的段落样式。据我所知,我认为不应该采用任何风格。这不会发生在运行中:当运行没有运行样式时,不会应用默认的运行样式。2 -为了获得有效的运行属性,有必要"卷起风格"。段落样式和运行样式遵循样式层次结构。为了获得特定的属性值,有必要在应用的样式中查找它,如果不存在,则在父样式中查找,依此类推。在特定样式中使用特定值定义的属性,如果具有相同的值,则不应将其添加到子样式中。Word在字符样式上不遵循这一规则。实际上,从运行样式应用的所有运行属性都可以直接为该运行样式获得,而无需遵循样式层次结构。这不符合标准。
现在,让我详细介绍一下我的解决方案:
首先,我的代码使用了openxml功能工具:http://powertools.codeplex.com/
接下来,关于样式继承,我采用并实现了Eric White提供的解决方案:http://blogs.msdn.com/b/ericwhite/archive/2009/12/13/implementing-inheritance-in-xml.aspx和http://blogs.msdn.com/b/ericwhite/archive/2009/10/29/open-xml-wordprocessingml-style-inheritance.aspx
获得运行属性的完整算法可以在标准中找到,它也由Eric White提供:http://blogs.msdn.com/b/ericwhite/archive/2009/11/12/assembling-paragraph-and-run-properties-for-cells-in-a-table.aspx在这种情况下,它涉及从表中的单元格中提取属性。我的方法不适用于表内的段落(我只是不需要它:-)),但它可以扩展到处理这些情况(所有信息都在Eric的文章中)
请注意,我正确地处理了toggle属性和word的实际工作方式(我所指出的与标准相关的差异)。
最后,代码:using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml.Linq;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Wordprocessing;
using OpenXmlPowerTools;
namespace MyNameSpace
{
class OpenXmlPowerToolsUtilities
{
public static XElement GetEffectiveRunProperties(WordprocessingDocument wordDoc, XElement run)
{
XElement runProperties = null;
List<XElement> runPropertiesList = new List<XElement>();
XElement paragraph = run.Parent;
if (paragraph.Name != W.p)
return null;
StyleDefinitionsPart styleDefinitionsPart = wordDoc.MainDocumentPart
.StyleDefinitionsPart;
if (styleDefinitionsPart == null)
return null;
XElement styles = styleDefinitionsPart.GetXDocument().Root;
// 1 - Get run default
XElement runDefault = styles.Elements(W.docDefaults)
.Elements(W.rPrDefault)
.Elements(W.rPr)
.FirstOrDefault();
if (runDefault != null)
runPropertiesList.Add(runDefault);
// 2 - get paragraph style run properties
XElement pStyleRunProperties = null;
string pStyle = (string)paragraph.Elements(W.pPr)
.Elements(W.pStyle)
.Attributes(W.val)
.FirstOrDefault();
if (pStyle != null)
{
pStyleRunProperties = AssembleStyleInformation(styles, pStyle)
.Elements(W.rPr)
.FirstOrDefault();
}
else
{
XElement defaultParagraphStyle = styles
.Elements(W.style)
.Where(e =>
(string)e.Attribute(W.type) == "paragraph" &&
(string)e.Attribute(W._default) == "1")
.Select(s => s)
.FirstOrDefault();
pStyleRunProperties = defaultParagraphStyle.Elements(W.rPr).FirstOrDefault();
}
if (pStyleRunProperties != null)
runPropertiesList.Add(pStyleRunProperties);
// 3 - get run style run properties
string rStyle = (string)run.Elements(W.rPr).Elements(W.rStyle).Attributes(W.val).FirstOrDefault();
XElement rStyleRunProperties = null;
if (rStyle != null)
{
rStyleRunProperties = AssembleStyleInformation(styles, rStyle)
.Elements(W.rPr)
.FirstOrDefault();
}
if (rStyleRunProperties != null)
runPropertiesList.Add(rStyleRunProperties);
XElement toggleProperties = AssembleToggleProperties(runDefault, pStyleRunProperties, rStyleRunProperties);
if (toggleProperties != null)
runPropertiesList.Add(toggleProperties);
// 4 - direct run properties
XElement directRunProperties = run.Elements(W.rPr).FirstOrDefault();
if (directRunProperties != null)
runPropertiesList.Add(directRunProperties);
runProperties = AssembleRunProperties(runPropertiesList);
return runProperties;
}
private static XElement AssembleRunProperties(List<XElement> runPropertiesList)
{
return runPropertiesList
.Aggregate(
new XElement(W.rPr,
new XAttribute(XNamespace.Xmlns + "w", W.w)),
(mergedRun, run) =>
MergeChildElements(mergedRun, run));
}
static XElement AssembleToggleProperties(XElement runDefault, XElement pStyleRunProperties, XElement rStyleRunProperties)
{
XElement runToggleProperties;
runToggleProperties = new XElement(W.rPr,
new XAttribute(XNamespace.Xmlns + "w", W.w));
foreach (XName toggleProperty in toggleProperties)
{
XElement runDefaultToggleProperty = runDefault.Elements(toggleProperty).FirstOrDefault();
if (runDefaultToggleProperty != null)
{
if ((string)runDefaultToggleProperty.Attributes(W.val).FirstOrDefault() != "0")
{
runToggleProperties.Add(runDefaultToggleProperty);
continue;
}
}
XElement pStyleToggleProperty = null;
if (pStyleRunProperties == null)
pStyleToggleProperty = null;
else
pStyleToggleProperty = pStyleRunProperties.Elements(toggleProperty).FirstOrDefault();
XElement rStyleToggleProperty = null;
if (rStyleRunProperties == null)
rStyleToggleProperty = null;
else
rStyleToggleProperty = rStyleRunProperties.Elements(toggleProperty).FirstOrDefault();
if (pStyleToggleProperty == null && rStyleToggleProperty != null)
runToggleProperties.Add(rStyleToggleProperty);
else if (pStyleToggleProperty != null && rStyleToggleProperty == null)
runToggleProperties.Add(pStyleToggleProperty);
else if (pStyleToggleProperty != null && rStyleToggleProperty != null)
{
if ((string)rStyleToggleProperty.Attributes(W.val).FirstOrDefault() == "0")
runToggleProperties.Add(pStyleToggleProperty);
else if ((string)pStyleToggleProperty.Attributes(W.val).FirstOrDefault() == "0")
runToggleProperties.Add(rStyleToggleProperty);
else
runToggleProperties.Add(new XElement(toggleProperty, new XAttribute(W.val, "0")));
}
}
return runToggleProperties;
}
public static IEnumerable<XElement> StyleChainReverseOrder(XElement styles, string styleId)
{
string current = styleId;
while (true)
{
XElement style = styles.Elements(W.style)
.Where(s => (string)s.Attribute(W.styleId) == current).FirstOrDefault();
yield return style;
current = (string)style.Elements(W.basedOn).Attributes(W.val).FirstOrDefault();
if (current == null)
yield break;
}
}
public static IEnumerable<XElement> StyleChain(XElement styles, string styleId)
{
return StyleChainReverseOrder(styles, styleId).Reverse();
}
private static XElement AssembleStyleInformation(XElement styles, string styleId)
{
return StyleChain(styles, styleId)
.Aggregate(
new XElement(W.style, new XAttribute(XNamespace.Xmlns + "w", W.w)),
(mergedStyle, style) => MergeChildElements(mergedStyle, style));
}
public static XName[] Others =
{
W.pStyle,
W.rStyle
};
public static XName[] ElementsWithMergeElementsSemantics =
{
W.style,
W.rPr,
W.pPr
};
public static XName[] ElementsWithMergeAttributesSemantics =
{
W.ind,
W.spacing,
W.lang
};
public static XName[] ElementsWithReplaceElementsSemantics =
{
W.name, // The style Name element
W.adjustRightInd,
W.autoSpaceDE,
W.autoSpaceDN,
W.bidi,
W.cnfStyle, // within a table
W.contextualSpacing,
W.divId,
W.framePr,
W.jc,
W.keepLines,
W.keepNext,
W.kinsoku,
W.mirrorIndents,
W.numPr,
W.outlineLvl,
W.overflowPunct,
W.pageBreakBefore,
W.pBdr,
W.shd,
W.snapToGrid,
W.suppressAutoHyphens,
W.suppressLineNumbers,
W.suppressOverlap,
W.tabs,
W.textAlignment,
W.textboxTightWrap, // within a textbox
W.textDirection,
W.topLinePunct,
W.widowControl,
W.wordWrap,
W.b,
W.bCs,
W.bdr,
W.caps,
W.color,
W.cs,
W.dstrike,
W.eastAsianLayout,
W.effect,
W.em,
W.emboss,
W.fitText,
W.highlight,
W.i,
W.iCs,
W.imprint,
W.kern,
W.noProof,
W.oMath,
W.outline,
W.position,
W.rFonts,
W.rtl,
W.shadow,
W.shd,
W.smallCaps,
W.snapToGrid,
//W.spacing, // different from paragraph spacing
W.specVanish,
W.strike,
W.sz,
W.szCs,
W.u,
W.vanish,
W.vertAlign,
W._w,
W.webHidden
};
public static XName[] toggleProperties =
{
W.b,
W.bCs,
W.caps,
W.emboss,
W.i,
W.iCs,
W.imprint,
W.outline,
W.shadow,
W.smallCaps,
W.strike,
W.vanish
};
public static bool IsValidMergeElement(XName name)
{
if (ElementsWithMergeAttributesSemantics.Contains(name) ||
ElementsWithMergeElementsSemantics.Contains(name) ||
ElementsWithReplaceElementsSemantics.Contains(name))
return true;
return false;
}
public static bool IsToggleProperty(XName name)
{
if (toggleProperties.Contains(name))
return true;
return false;
}
public static bool HasReplaceSemantics(XName name)
{
if (ElementsWithReplaceElementsSemantics.Contains(name))
return true;
return false;
}
public static bool HasMergeElementsSemantics(XName name)
{
if (ElementsWithMergeElementsSemantics.Contains(name))
return true;
return false;
}
public static bool HasMergeAttributesSemantics(XName name)
{
if (ElementsWithMergeAttributesSemantics.Contains(name))
return true;
return false;
}
public static XElement MergeChildElements(XElement mergedElement, XElement element)
{
if (mergedElement == null || element == null)
{
if (element == null)
element = mergedElement;
XElement newElement = new XElement(element.Name,
new XAttribute(XNamespace.Xmlns + "w", W.w),
element.Attributes()
.Where(a =>
{
if (a.IsNamespaceDeclaration)
return false;
if (element.Name == W.style)
if (!(a.Name == W.type || a.Name == W.styleId))
return false;
return true;
}),
element.Elements().Select(e =>
{
if (e.Name == W.rPr || e.Name == W.pPr)
return MergeChildElements(null, e);
if (IsValidMergeElement(e.Name))
return e;
return null;
}));
return newElement;
}
XElement newMergedElement = new XElement(element.Name,
new XAttribute(XNamespace.Xmlns + "w", W.w),
element.Attributes()
.Where(a =>
{
if (a.IsNamespaceDeclaration)
return false;
if (element.Name == W.style)
if (!(a.Name == W.type || a.Name == W.styleId))
return false;
return true;
}),
element.Elements().Select(e =>
{
if (HasReplaceSemantics(e.Name))
return e;
// spacing within run properties has replace semantics
if (element.Name == W.rPr && e.Name == W.spacing)
return e;
if (HasMergeAttributesSemantics(e.Name))
{
XElement newElement;
newElement = new XElement(e.Name,
e.Attributes(),
mergedElement.Elements(e.Name).Attributes()
.Where(a =>
!(e.Attributes().Any(z => z.Name == a.Name))));
return newElement;
}
if (e.Name == W.rPr || e.Name == W.pPr)
{
XElement correspondingElement = mergedElement.Element(e.Name);
return MergeChildElements(correspondingElement, e);
}
return null;
}),
mergedElement.Elements()
.Where(m => !element.Elements(m.Name).Any()));
return newMergedElement;
}
}
}