OpenXML заменить текст во всем документе

Question

OpenXML заменить текст во всем документе

У меня есть фрагмент кода ниже. Я бы хотел заменить текст "Text1" на "NewText", это работа. Но когда я помещаю текст " Text1 "в таблицу, которая больше не работает для" Text1 " внутри таблицы.

Я хотел бы сделать эту замену во всем документе.

using (WordprocessingDocument doc = WordprocessingDocument.Open(String.Format("c:tempfilename.docx"), true))
{
    var body = doc.MainDocumentPart.Document.Body;

    foreach (var para in body.Elements<Paragraph>())
    {
        foreach (var run in para.Elements<Run>())
        {
            foreach (var text in run.Elements<Text>())
            {
                if (text.Text.Contains("##Text1##"))
                    text.Text = text.Text.Replace("##Text1##", "NewText");
            }
        }
    }
}

8

.net c# ms-word openxml openxml-sdk

автор: Deduplicator

3 ответов

автор: Hans · Accepted Answer · 2013-09-30 17:23:56

ваш код не работает, потому что элемент таблицы (w:tbl) не содержится в элемент абзаца (w:p). См. следующее MSDN статьи для получения дополнительной информации.

на Text класса (по частям w:t) обычно представляет буквальный текст в Run элемент в a документ Word. Таким образом, вы можете просто искать все w:t элементов (Text класс) и заменить тег, если текстовый элемент (w:t) содержит тег:

using (WordprocessingDocument doc = WordprocessingDocument.Open("yourdoc.docx", true))
{
  var body = doc.MainDocumentPart.Document.Body;

  foreach (var text in body.Descendants<Text>())
  {
    if (text.Text.Contains("##Text1##"))
    {
      text.Text = text.Text.Replace("##Text1##", "NewText");
    }
  }
}

автор: Amos Zoellner · Accepted Answer · 2015-05-06 15:50:00

заимствования на некоторые другие ответы в разных местах, и с тем, что четыре основных препятствия должны быть преодолены:

удалить любые символы Юникода высокого уровня из строки замены, которые не могут быть прочитаны из Word (от плохого ввода пользователя)
возможность поиска результата поиска по нескольким прогонам или текстовым элементам в абзаце (слово часто разбивает одно предложение на несколько текстовых прогонов)
возможность включить разрыв строки в заменить текст, чтобы вставить многострочный текст в документ.
возможность передачи любого узла в качестве отправной точки для поиска, чтобы ограничить поиск этой частью документа (например, телом, заголовком, нижним колонтитулом, определенной таблицей, строкой таблицы или ячейкой таблицы).

Я уверен, что расширенные сценарии, такие как закладки, сложная вложенность, потребуют больше изменений в этом, но он работает для типов основных документов word, с которыми я столкнулся гораздо полезнее для меня, чем игнорировать запуски вообще или использовать регулярное выражение для всего файла без возможности нацелиться на определенную ячейку таблицы или часть документа (для расширенных сценариев).

Пример Использования:

 var body = document.MainDocumentPart.Document.Body;
 ReplaceText(body, replace, with);

код:

using System;
using System.Collections.Generic;
using System.Linq;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;

namespace My.Web.Api.OpenXml
{
    public static class WordTools
    {


/// <summary>
        /// Find/replace within the specified paragraph.
        /// </summary>
        /// <param name="paragraph"></param>
        /// <param name="find"></param>
        /// <param name="replaceWith"></param>
        public static void ReplaceText(Paragraph paragraph, string find, string replaceWith)
        {
            var texts = paragraph.Descendants<Text>();
            for (int t = 0; t < texts.Count(); t++)
            {   // figure out which Text element within the paragraph contains the starting point of the search string
                Text txt = texts.ElementAt(t);
                for (int c = 0; c < txt.Text.Length; c++)
                {
                    var match = IsMatch(texts, t, c, find);
                    if (match != null)
                    {   // now replace the text
                        string[] lines = replaceWith.Replace(Environment.NewLine, "\r").Split('\n', '\r'); // handle any lone n/r returns, plus newline.

                        int skip = lines[lines.Length - 1].Length - 1; // will jump to end of the replacement text, it has been processed.

                        if (c > 0)
                            lines[0] = txt.Text.Substring(0, c) + lines[0];  // has a prefix
                        if (match.EndCharIndex + 1 < texts.ElementAt(match.EndElementIndex).Text.Length)
                            lines[lines.Length - 1] = lines[lines.Length - 1] + texts.ElementAt(match.EndElementIndex).Text.Substring(match.EndCharIndex + 1);

                        txt.Space = new EnumValue<SpaceProcessingModeValues>(SpaceProcessingModeValues.Preserve); // in case your value starts/ends with whitespace
                        txt.Text = lines[0];

                        // remove any extra texts.
                        for (int i = t + 1; i <= match.EndElementIndex; i++)
                        {
                            texts.ElementAt(i).Text = string.Empty; // clear the text
                        }

                        // if 'with' contained line breaks we need to add breaks back...
                        if (lines.Count() > 1)
                        {
                            OpenXmlElement currEl = txt;
                            Break br;

                            // append more lines
                            var run = txt.Parent as Run;
                            for (int i = 1; i < lines.Count(); i++)
                            {
                                br = new Break();
                                run.InsertAfter<Break>(br, currEl);
                                currEl = br;
                                txt = new Text(lines[i]);
                                run.InsertAfter<Text>(txt, currEl);
                                t++; // skip to this next text element
                                currEl = txt;
                            }
                            c = skip; // new line
                        }
                        else
                        {   // continue to process same line
                            c += skip;
                        }
                    }
                }
            }
        }



        /// <summary>
        /// Determine if the texts (starting at element t, char c) exactly contain the find text
        /// </summary>
        /// <param name="texts"></param>
        /// <param name="t"></param>
        /// <param name="c"></param>
        /// <param name="find"></param>
        /// <returns>null or the result info</returns>
        static Match IsMatch(IEnumerable<Text> texts, int t, int c, string find)
        {
            int ix = 0;
            for (int i = t; i < texts.Count(); i++)
            {
                for (int j = c; j < texts.ElementAt(i).Text.Length; j++)
                {
                    if (find[ix] != texts.ElementAt(i).Text[j])
                    {
                        return null; // element mismatch
                    }
                    ix++; // match; go to next character
                    if (ix == find.Length)
                        return new Match() { EndElementIndex = i, EndCharIndex = j }; // full match with no issues
                }
                c = 0; // reset char index for next text element
            }
            return null; // ran out of text, not a string match
        }

        /// <summary>
        /// Defines a match result
        /// </summary>
        class Match
        {
            /// <summary>
            /// Last matching element index containing part of the search text
            /// </summary>
            public int EndElementIndex { get; set; }
            /// <summary>
            /// Last matching char index of the search text in last matching element
            /// </summary>
            public int EndCharIndex { get; set; }
        }

     }   // class
}  // namespace


public static class OpenXmlTools
    {
        // filters control characters but allows only properly-formed surrogate sequences
        private static Regex _invalidXMLChars = new Regex(
            @"(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F\uFEFF\uFFFE\uFFFF]",
            RegexOptions.Compiled);
        /// <summary>
        /// removes any unusual unicode characters that can't be encoded into XML which give exception on save
        /// </summary>
        public static string RemoveInvalidXMLChars(string text)
        {
            if (string.IsNullOrEmpty(text)) return "";
            return _invalidXMLChars.Replace(text, "");
        }
    }

автор: Emanuele Greco · Accepted Answer · 2015-03-31 10:46:23

возможно, это решение проще

using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(document, true))
{
 string docText = null;
 //1. Copy all the file into a string
 using (StreamReader sr = new StreamReader(wordDoc.MainDocumentPart.GetStream()))
     docText = sr.ReadToEnd();

 //2. Use regular expression to replace all text
 Regex regexText = new Regex(find);
 docText = regexText.Replace(docText, replace);

 //3. Write the changed string into the file again
 using (StreamWriter sw = new StreamWriter(wordDoc.MainDocumentPart.GetStream(FileMode.Create)))
      sw.Write(docText);