From 8589861a73b4b8885fda0256f2d6b57c91b84531 Mon Sep 17 00:00:00 2001 From: Peter Chapman Date: Tue, 28 Jan 2025 15:56:19 +1300 Subject: [PATCH 1/4] SF-3184 Add USJ conversion support to and from USX --- src/SIL.Converters.Usj/IUsj.cs | 27 ++ .../LowerCaseNamingStrategy.cs | 13 + .../SIL.Converters.Usj.csproj | 11 + src/SIL.Converters.Usj/Usj.cs | 24 ++ src/SIL.Converters.Usj/UsjBase.cs | 39 ++ src/SIL.Converters.Usj/UsjContentConverter.cs | 61 +++ src/SIL.Converters.Usj/UsjMarker.cs | 68 ++++ src/SIL.Converters.Usj/UsjToUsx.cs | 201 ++++++++++ src/SIL.Converters.Usj/Usx.cs | 19 + src/SIL.Converters.Usj/UsxToUsj.cs | 178 +++++++++ .../SIL.Converters.Usj.Tests.csproj | 27 ++ test/SIL.Converters.Usj.Tests/TestData.cs | 358 ++++++++++++++++++ .../UsjContentConverterTests.cs | 22 ++ .../SIL.Converters.Usj.Tests/UsjToUsxTests.cs | 77 ++++ .../SIL.Converters.Usj.Tests/UsxToUsjTests.cs | 80 ++++ xForge.sln | 14 + 16 files changed, 1219 insertions(+) create mode 100644 src/SIL.Converters.Usj/IUsj.cs create mode 100644 src/SIL.Converters.Usj/LowerCaseNamingStrategy.cs create mode 100644 src/SIL.Converters.Usj/SIL.Converters.Usj.csproj create mode 100644 src/SIL.Converters.Usj/Usj.cs create mode 100644 src/SIL.Converters.Usj/UsjBase.cs create mode 100644 src/SIL.Converters.Usj/UsjContentConverter.cs create mode 100644 src/SIL.Converters.Usj/UsjMarker.cs create mode 100644 src/SIL.Converters.Usj/UsjToUsx.cs create mode 100644 src/SIL.Converters.Usj/Usx.cs create mode 100644 src/SIL.Converters.Usj/UsxToUsj.cs create mode 100644 test/SIL.Converters.Usj.Tests/SIL.Converters.Usj.Tests.csproj create mode 100644 test/SIL.Converters.Usj.Tests/TestData.cs create mode 100644 test/SIL.Converters.Usj.Tests/UsjContentConverterTests.cs create mode 100644 test/SIL.Converters.Usj.Tests/UsjToUsxTests.cs create mode 100644 test/SIL.Converters.Usj.Tests/UsxToUsjTests.cs diff --git a/src/SIL.Converters.Usj/IUsj.cs b/src/SIL.Converters.Usj/IUsj.cs new file mode 100644 index 0000000000..8e01de8107 --- /dev/null +++ b/src/SIL.Converters.Usj/IUsj.cs @@ -0,0 +1,27 @@ +using System.Collections; + +namespace SIL.Converters.Usj +{ + /// + /// An interface for Unified Scripture JSON (USJ) types. + /// + public interface IUsj + { + /// + /// The JSON representation of scripture contents from USFM/USX. + /// + /// This will either be a or . + /// Nullable. The contents will be laid out in order. + ArrayList Content { get; set; } + + /// + /// The USJ spec type. + /// + string Type { get; set; } + + /// + /// The USJ spec version. + /// + string Version { get; set; } + } +} diff --git a/src/SIL.Converters.Usj/LowerCaseNamingStrategy.cs b/src/SIL.Converters.Usj/LowerCaseNamingStrategy.cs new file mode 100644 index 0000000000..d8ac0f6c61 --- /dev/null +++ b/src/SIL.Converters.Usj/LowerCaseNamingStrategy.cs @@ -0,0 +1,13 @@ +using Newtonsoft.Json.Serialization; + +namespace SIL.Converters.Usj +{ + /// + /// Ensures that the JSON properties for the USJ data model are in lower case. + /// + internal class LowerCaseNamingStrategy : NamingStrategy + { + /// + protected override string ResolvePropertyName(string name) => name.ToLowerInvariant(); + } +} diff --git a/src/SIL.Converters.Usj/SIL.Converters.Usj.csproj b/src/SIL.Converters.Usj/SIL.Converters.Usj.csproj new file mode 100644 index 0000000000..e2d75c1902 --- /dev/null +++ b/src/SIL.Converters.Usj/SIL.Converters.Usj.csproj @@ -0,0 +1,11 @@ + + + + netstandard2.0 + + + + + + + diff --git a/src/SIL.Converters.Usj/Usj.cs b/src/SIL.Converters.Usj/Usj.cs new file mode 100644 index 0000000000..719bc7e997 --- /dev/null +++ b/src/SIL.Converters.Usj/Usj.cs @@ -0,0 +1,24 @@ +namespace SIL.Converters.Usj +{ + /// + /// Unified Scripture JSON (USJ) - The JSON variant of USFM and USX data models. + /// These types follow this schema: https://github.com/usfm-bible/tcdocs/blob/usj/grammar/usj.js + /// + public class Usj : UsjBase, IUsj + { + /// + /// The supported USJ spec type. + /// + public const string UsjType = "USJ"; + + /// + /// The supported USJ spec version. + /// + public const string UsjVersion = "3.1"; + + /// + /// The USJ spec version. + /// + public string Version { get; set; } + } +} diff --git a/src/SIL.Converters.Usj/UsjBase.cs b/src/SIL.Converters.Usj/UsjBase.cs new file mode 100644 index 0000000000..d5687850d7 --- /dev/null +++ b/src/SIL.Converters.Usj/UsjBase.cs @@ -0,0 +1,39 @@ +using System.Collections; +using System.Collections.Generic; +using Newtonsoft.Json; + +namespace SIL.Converters.Usj +{ + /// + /// Elements shared between and . + /// + [JsonObject(NamingStrategyType = typeof(LowerCaseNamingStrategy), ItemNullValueHandling = NullValueHandling.Ignore)] + public abstract class UsjBase + { + /// + /// For , this is the USJ spec type. + /// For , this is the kind/category of node or element this is, + /// corresponding the USFM marker and USX node. + /// + /// para, verse, char. + public string Type { get; set; } + + /// + /// The JSON representation of scripture contents from USFM/USX. + /// + /// This will either be a or . + /// Nullable. The contents will be laid out in order. + [JsonConverter(typeof(UsjContentConverter))] + public ArrayList Content { get; set; } + + /// + /// Additional attributes that are not a part of the USJ specification. + /// This is only used for . + /// + /// + /// These are typically closed, colspan, etc. + /// + [JsonExtensionData] + public Dictionary AdditionalData { get; } = new Dictionary(); + } +} diff --git a/src/SIL.Converters.Usj/UsjContentConverter.cs b/src/SIL.Converters.Usj/UsjContentConverter.cs new file mode 100644 index 0000000000..76993559cb --- /dev/null +++ b/src/SIL.Converters.Usj/UsjContentConverter.cs @@ -0,0 +1,61 @@ +using System; +using System.Collections; +using Newtonsoft.Json; +using Newtonsoft.Json.Linq; + +namespace SIL.Converters.Usj +{ + /// + /// Converts the contents of the Content to and from JSON, preserving + /// the two supported content types: and . + /// + public class UsjContentConverter : JsonConverter + { + /// + public override ArrayList ReadJson( + JsonReader reader, + Type objectType, + ArrayList existingValue, + bool hasExistingValue, + JsonSerializer serializer + ) + { + var jArray = JArray.Load(reader); + var list = new ArrayList(); + + foreach (JToken item in jArray) + { + if (item.Type == JTokenType.String) + { + list.Add(item.ToString()); + } + else + { + var usjMarker = item.ToObject(); + list.Add(usjMarker); + } + } + + return list; + } + + /// + public override void WriteJson(JsonWriter writer, ArrayList value, JsonSerializer serializer) + { + JArray jArray = new JArray(); + foreach (object item in value) + { + if (item is string str) + { + jArray.Add(str); + } + else if (item is UsjMarker usjMarker) + { + jArray.Add(JToken.FromObject(usjMarker)); + } + } + + jArray.WriteTo(writer); + } + } +} diff --git a/src/SIL.Converters.Usj/UsjMarker.cs b/src/SIL.Converters.Usj/UsjMarker.cs new file mode 100644 index 0000000000..a6b6486bf0 --- /dev/null +++ b/src/SIL.Converters.Usj/UsjMarker.cs @@ -0,0 +1,68 @@ +namespace SIL.Converters.Usj +{ + /// + /// A Scripture Marker and its contents. + /// + public class UsjMarker : UsjBase + { + /// + /// The corresponding marker in USFM or style in USX. + /// + /// p, v, nd. + public string Marker { get; set; } + + /// + /// Indicates the Book-chapter-verse value in the paragraph based structure. + /// + /// Nullable. + public string Sid { get; set; } + + /// + /// Milestone end ID, matches start ID (not currently included in USJ spec). + /// + /// Nullable. + public string Eid { get; set; } + + /// + /// Chapter number or verse number. + /// + /// Nullable. + public string Number { get; set; } + + /// + /// The 3-letter book code in ID element. + /// + /// Nullable. + public string Code { get; set; } + + /// + /// Alternate chapter number or verse number. + /// + /// Nullable. + public string AltNumber { get; set; } + + /// + /// Published character of chapter or verse. + /// + /// Nullable. + public string PubNumber { get; set; } + + /// + /// Caller character for footnotes and cross-refs. + /// + /// Nullable. + public string Caller { get; set; } + + /// + /// Alignment of table cells. + /// + /// Nullable. + public string Align { get; set; } + + /// + /// Category of extended study bible sections. + /// + /// Nullable. + public string Category { get; set; } + } +} diff --git a/src/SIL.Converters.Usj/UsjToUsx.cs b/src/SIL.Converters.Usj/UsjToUsx.cs new file mode 100644 index 0000000000..52fd915c0f --- /dev/null +++ b/src/SIL.Converters.Usj/UsjToUsx.cs @@ -0,0 +1,201 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using System.Xml; + +namespace SIL.Converters.Usj +{ + /// + /// Convert Scripture from USJ to USX. + /// + /// + /// Ported to C# from this file: + /// https://github.com/BiblioNexus-Foundation/scripture-editors/blob/main/packages/utilities/src/converters/usj/usj-to-usx.ts + /// + public class UsjToUsx + { + private string _chapterEid; + private string _verseEid; + + private XmlElement CreateVerseEndElement(XmlDocument usxDoc) + { + XmlElement eidElement = usxDoc.CreateElement("verse"); + eidElement.SetAttribute("eid", _verseEid); + return eidElement; + } + + private XmlElement CreateChapterEndElement(XmlDocument usxDoc) + { + XmlElement eidElement = usxDoc.CreateElement("chapter"); + eidElement.SetAttribute("eid", _chapterEid); + return eidElement; + } + + private static void SetAttributes(XmlElement element, UsjMarker markerContent) + { + // Get the standard attributes + // This order is based on the order in ParatextData's UsxUsfmParserSink + var attributes = new Dictionary + { + { "code", markerContent.Code }, + { "caller", markerContent.Caller }, + { "number", markerContent.Number }, + { markerContent.Type == "unmatched" ? "marker" : "style", markerContent.Marker }, + { "altnumber", markerContent.AltNumber }, + { "pubnumber", markerContent.PubNumber }, + { "align", markerContent.Align }, + { "category", markerContent.Category }, + { "sid", markerContent.Sid }, + { "eid", markerContent.Eid }, + }; + + // Add the standard attributes + foreach (var kvp in attributes.Where(kvp => kvp.Value != null)) + { + element.SetAttribute(kvp.Key, kvp.Value); + } + + // Add the non-standard attributes + foreach (KeyValuePair nonStandardAttribute in markerContent.AdditionalData) + { + string key = nonStandardAttribute.Key; + if (nonStandardAttribute.Value is string value && key != "type" && key != "marker" && key != "content") + { + element.SetAttribute(key, value); + } + } + } + + private void ConvertUsjRecurse( + object markerContent, + XmlElement parentElement, + XmlDocument usxDoc, + bool isLastItem + ) + { + XmlNode node; + string type = null; + XmlElement eidElement = null; + UsjMarker usjMarker = markerContent as UsjMarker; + if (markerContent is string markerText) + { + node = usxDoc.CreateTextNode(markerText); + } + else if (usjMarker != null) + { + type = usjMarker.Type.Replace("table:", string.Empty); + XmlElement element = usxDoc.CreateElement(type); + node = element; + SetAttributes(element, usjMarker); + if (usjMarker.Content != null) + { + for (int i = 0; i < usjMarker.Content.Count; i++) + { + bool elementIsLastItem = i == usjMarker.Content.Count - 1; + ConvertUsjRecurse(usjMarker.Content[i], element, usxDoc, elementIsLastItem); + } + } + } + else + { + throw new ArgumentOutOfRangeException(nameof(markerContent)); + } + + // Create chapter and verse end elements from SID attributes. + if (_verseEid != null && (type == "verse" || (parentElement.Name == "para" && isLastItem))) + { + eidElement = CreateVerseEndElement(usxDoc); + _verseEid = null; + } + + if (type == "verse" && usjMarker?.Sid != null) + { + _verseEid = usjMarker.Sid; + } + + if (_chapterEid != null && (type == "chapter" || (type == "para" && isLastItem))) + { + eidElement = CreateChapterEndElement(usxDoc); + _chapterEid = null; + } + + if (type == "chapter" && usjMarker?.Sid != null) + { + _chapterEid = usjMarker.Sid; + } + + // Append to parent. + if (eidElement != null && !isLastItem) + { + parentElement.AppendChild(eidElement); + } + + parentElement.AppendChild(node); + + if (eidElement != null && isLastItem) + { + parentElement.AppendChild(eidElement); + } + + // Allow for final chapter and verse end elements at the end of an implied para. + if (isLastItem && parentElement.Name == Usx.UsxType) + { + if (_verseEid != null) + { + parentElement.AppendChild(CreateVerseEndElement(usxDoc)); + } + + if (_chapterEid != null) + { + parentElement.AppendChild(CreateChapterEndElement(usxDoc)); + } + + _verseEid = null; + _chapterEid = null; + } + } + + private void UsjToUsxDom(IUsj usj, XmlDocument usxDoc) + { + for (int i = 0; i < usj.Content.Count; i++) + { + bool isLastItem = i == usj.Content.Count - 1; + ConvertUsjRecurse(usj.Content[i], usxDoc.DocumentElement, usxDoc, isLastItem); + } + } + + /// + /// Converts a USJ object to a USX string. + /// + /// The USJ object. + /// The USX as a string. + public string UsjToUsxString(IUsj usj) + { + XmlDocument usxDoc = new XmlDocument { PreserveWhitespace = true }; + XmlElement documentElement = usxDoc.CreateElement(Usx.UsxType); + documentElement.SetAttribute("version", Usx.UsxVersion); + usxDoc.AppendChild(documentElement); + UsjToUsxDom(usj, usxDoc); + + using (StringWriter stringWriter = new StringWriter()) + { + // These settings conform to ParatextData.UsfmToUsx + XmlWriterSettings xmlWriterSettings = new XmlWriterSettings + { + Indent = true, + Encoding = Encoding.UTF8, + OmitXmlDeclaration = true, + NewLineChars = "\r\n", + }; + using (XmlWriter xmlWriter = XmlWriter.Create(stringWriter, xmlWriterSettings)) + { + usxDoc.WriteTo(xmlWriter); + xmlWriter.Flush(); + return stringWriter.ToString(); + } + } + } + } +} diff --git a/src/SIL.Converters.Usj/Usx.cs b/src/SIL.Converters.Usj/Usx.cs new file mode 100644 index 0000000000..ab8036a7cd --- /dev/null +++ b/src/SIL.Converters.Usj/Usx.cs @@ -0,0 +1,19 @@ +namespace SIL.Converters.Usj +{ + /// + /// Unified Scripture XML (USX). + /// These types follow this schema: https://github.com/usfm-bible/tcdocs/blob/main/grammar/usx.rng + /// + public static class Usx + { + /// + /// The USX spec type. + /// + public const string UsxType = "usx"; + + /// + /// The USX spec version. + /// + public const string UsxVersion = "3.0"; + } +} diff --git a/src/SIL.Converters.Usj/UsxToUsj.cs b/src/SIL.Converters.Usj/UsxToUsj.cs new file mode 100644 index 0000000000..58628942cd --- /dev/null +++ b/src/SIL.Converters.Usj/UsxToUsj.cs @@ -0,0 +1,178 @@ +using System.Collections; +using System.Collections.Generic; +using System.Linq; +using System.Reflection; +using System.Xml; + +namespace SIL.Converters.Usj +{ + /// + /// Convert Scripture from USX to USJ. + /// + /// + /// Ported to C# from this file: + /// https://github.com/BiblioNexus-Foundation/scripture-editors/blob/main/packages/utilities/src/converters/usj/usx-to-usj.ts + /// + public static class UsxToUsj + { + private static (T, bool) UsxDomToUsjRecurse(XmlElement usxElement) + where T : UsjBase, new() + { + string type = usxElement.Name; + string text = null; + bool append = true; + + // A row or cell type must be prefixed by "table:" + if (type == "row" || type == "cell") + { + type = "table:" + type; + } + + // Convert the XML attributes to a dictionary of strings + Dictionary attributes = usxElement + .Attributes.Cast() + .ToDictionary(attrib => attrib.Name, attrib => attrib.Value); + + // If style is present, make that the market + if (attributes.TryGetValue("style", out string marker)) + { + attributes.Remove("style"); + } + + // Dropping because presence of vid in para elements is not consistent in USX + if (attributes.ContainsKey("vid")) + { + attributes.Remove("vid"); + } + + // Dropping because it is nonstandard derived metadata that could get out of date + if (attributes.ContainsKey("status")) + { + attributes.Remove("status"); + } + + var outObj = new T { Type = type }; + if (marker != null && outObj is UsjMarker usjMarker1) + { + usjMarker1.Marker = marker; + } + + // Set the attributes, placing unknown attributes in the Json Extension Data + // This code implements the Typescript code: outObj = { ...outObj, ...attributes }; + foreach (KeyValuePair attrib in attributes) + { + PropertyInfo property = outObj + .GetType() + .GetProperty(attrib.Key, BindingFlags.Public | BindingFlags.Instance | BindingFlags.IgnoreCase); + if (property != null && property.CanWrite) + { + // Set the property if it exists + property.SetValue(outObj, attrib.Value); + } + else + { + // Add to the Json Extension Data if the property does not exist + outObj.AdditionalData[attrib.Key] = attrib.Value; + } + } + + if ( + usxElement.FirstChild?.NodeType == XmlNodeType.Text + && !string.IsNullOrWhiteSpace(usxElement.FirstChild.Value) + ) + { + text = usxElement.FirstChild.Value; + } + + outObj.Content = new ArrayList(); + if (!string.IsNullOrWhiteSpace(text)) + { + outObj.Content.Add(text); + } + + foreach (XmlNode childNode in usxElement.ChildNodes) + { + // We are only interested in elements + if (!(childNode is XmlElement child) || string.IsNullOrWhiteSpace(child.Name)) + { + continue; + } + + (UsjMarker childDict, bool appendChild) = UsxDomToUsjRecurse(child); + + if (appendChild) + { + outObj.Content.Add(childDict); + } + + if ( + child.NextSibling?.NodeType == XmlNodeType.Text + || (child.NextSibling?.NodeType == XmlNodeType.Whitespace && child.NextSibling.Value == " ") + ) + { + outObj.Content.Add(child.NextSibling.Value); + } + } + + if (outObj.Content.Count == 0 && outObj.Type != Usx.UsxType) + { + outObj.Content = null; + } + + if (outObj is UsjMarker usjMarker2 && usjMarker2.Eid != null && (type == "verse" || type == "chapter")) + { + // Ignore + append = false; + } + + return (outObj, append); + } + + private static Usj UsxDomToUsj(XmlElement usxDom) + { + Usj outputJson; + if (usxDom == null) + { + outputJson = new Usj { Content = new ArrayList() }; + } + else + { + (outputJson, _) = UsxDomToUsjRecurse(usxDom); + } + + outputJson.Type = Usj.UsjType; + outputJson.Version = Usj.UsjVersion; + return outputJson; + } + + /// + /// Converts a USX string to USJ. + /// + /// The USX string. + /// The USJ. + public static Usj UsxStringToUsj(string usx) + { + if (string.IsNullOrWhiteSpace(usx)) + { + return UsxDomToUsj(null); + } + + XmlDocument xmlDocument = new XmlDocument + { + PreserveWhitespace = true, // Whitespace inside nodes is important + }; + xmlDocument.LoadXml(usx); + return UsxDomToUsj(xmlDocument.DocumentElement); + } + + /// + /// Converts a USX Xml Document to USJ. + /// + /// The XML document. + /// The USJ. + /// + /// The should have set to true. + /// + public static Usj UsxXmlDocumentToUsj(XmlDocument xmlDocument) => UsxDomToUsj(xmlDocument?.DocumentElement); + } +} diff --git a/test/SIL.Converters.Usj.Tests/SIL.Converters.Usj.Tests.csproj b/test/SIL.Converters.Usj.Tests/SIL.Converters.Usj.Tests.csproj new file mode 100644 index 0000000000..c0ea86703f --- /dev/null +++ b/test/SIL.Converters.Usj.Tests/SIL.Converters.Usj.Tests.csproj @@ -0,0 +1,27 @@ + + + + net8.0 + enable + + false + true + + + + + + + + all + runtime; build; native; contentfiles; analyzers + + + + + + + + + + diff --git a/test/SIL.Converters.Usj.Tests/TestData.cs b/test/SIL.Converters.Usj.Tests/TestData.cs new file mode 100644 index 0000000000..7b196b928d --- /dev/null +++ b/test/SIL.Converters.Usj.Tests/TestData.cs @@ -0,0 +1,358 @@ +using System.Text.RegularExpressions; +using Newtonsoft.Json; + +namespace SIL.Converters.Usj.Tests; + +/// +/// Test data for USJ and USX conversion. +/// +/// +/// Converted to C# from: +/// https://github.com/BiblioNexus-Foundation/scripture-editors/blob/main/packages/utilities/src/converters/usj/converter-test.data.ts +/// +public static partial class TestData +{ + private const string IDEOGRAPHIC_SPACE = "\u3000"; + private const string THIN_SPACE = "\u2009"; + + [GeneratedRegex(@"(?!>)[ \r\n\t]{2,}(?=<)", RegexOptions.Compiled)] + private static partial Regex InterElementWhiteSpace(); + + [GeneratedRegex(@"(?!>)[ \r\n\t]+(?=<\/usx>)", RegexOptions.Compiled)] + private static partial Regex LastElementWhiteSpace(); + + public static string RemoveXmlWhiteSpace(string xml) + { + // C# adds whitespace to self-closing elements, so we do not remove it + xml = InterElementWhiteSpace().Replace(xml, string.Empty); + xml = LastElementWhiteSpace().Replace(xml, string.Empty); + return xml.Trim(); + } + + /// + /// Genesis 1:1 in USJ as a JSON string. + /// + public const string JsonGen1V1 = $$""" + { + type: "{{Usj.UsjType}}", + version: "{{Usj.UsjVersion}}", + content: [ + { type: "book", marker: "id", code: "GEN", content: ["Some Scripture Version"] }, + { type: "chapter", marker: "c", number: "1", sid: "GEN 1", pubnumber: "I" }, + { + type: "para", + marker: "p", + content: [ + { type: "verse", marker: "v", number: "1", sid: "GEN 1:1" }, + "the first verse ", + { type: "verse", marker: "v", number: "2", sid: "GEN 1:2" }, + "the second verse ", + { type: "verse", marker: "v", number: "15", altnumber: "3", sid: "GEN 1:15" }, + "Tell the Israelites that I, the ", + { type: "char", marker: "nd", content: ["Lord"] }, + ", the God of their ancestors, the God of Abraham, Isaac, and Jacob,", + { type: "char", marker: "va", content: ["4"] }, + ], + }, + { type: "para", marker: "b" }, + { + type: "para", + marker: "q2", + content: [ + { type: "verse", marker: "v", number: "16", sid: "GEN 1:16" }, + "“There is no help for him in God.”", + { + type: "note", + marker: "f", + caller: "+", + category: "TN", + content: [ + { type: "char", marker: "fr", content: ["3:2 "] }, + { + type: "char", + marker: "ft", + content: ["The Hebrew word rendered “God” is “אֱלֹהִ֑ים” (Elohim)."], + }, + ], + }, + " ", + { type: "unmatched", marker: "f*" }, + " ", + { type: "char", marker: "qs", content: ["Selah."] }, + ], + }, + ], + } + """; + + /// + /// An empty USX document. + /// + public const string UsxEmpty = $""""""; + + /// + /// An empty USJ object. + /// + public static readonly Usj UsjEmpty = JsonConvert.DeserializeObject( + $$"""{ type: "{{Usj.UsjType}}", version: "{{Usj.UsjVersion}}", content: [] }""" + )!; + + /// + /// Genesis 1:1 in USX (with other test data). + /// + /// + /// Reformatted from: + /// https://github.com/mvh-solutions/nice-usfm-json/blob/main/samples/character/origin.xml + /// Additional test features: + /// - Whitespace for all self-closing element endings. + /// - Reorder attributes to match UsxUsfmParserSink output. + /// + public const string UsxGen1V1 = $""" + + Some Scripture Version + + + the first verse + the second verse + Tell the Israelites that I, the Lord, the God of their ancestors, the God of Abraham, Isaac, and Jacob,4 + + + “There is no help for him in God.”3:2 The Hebrew word rendered “God” is “אֱלֹהִ֑ים” (Elohim). Selah. + + + """; + + /// + /// Genesis 1:1 in USJ (with other test data). + /// + /// + /// Modified from: + /// https://github.com/mvh-solutions/nice-usfm-json/blob/main/samples/character/proposed.json + /// Additional test features: + /// - Preserve significant whitespace at the beginning or end of text + /// - Preserve significant whitespace between elements + /// - Reorder attributes to match UsxUsfmParserSink output. + /// + public static readonly Usj UsjGen1V1 = JsonConvert.DeserializeObject(JsonGen1V1)!; + + /// + /// Tests a chapter with an implied paragraph. + /// + /// Attributes are reordered to match UsxUsfmParserSink output. + public const string UsxGen1V1ImpliedPara = $""" + + + + the first verse + the second verse + Tell the Israelites that I, the Lord, the God of their ancestors, the God of Abraham, Isaac, and Jacob, + + + """; + + public static readonly Usj UsjGen1V1ImpliedPara = JsonConvert.DeserializeObject( + $$""" + { + type: "{{Usj.UsjType}}", + version: "{{Usj.UsjVersion}}", + content: [ + { type: "book", marker: "id", code: "GEN" }, + { type: "chapter", marker: "c", number: "1", sid: "GEN 1" }, + { type: "verse", marker: "v", number: "1", sid: "GEN 1:1" }, + "the first verse ", + { type: "verse", marker: "v", number: "2", sid: "GEN 1:2" }, + "the second verse ", + { type: "verse", marker: "v", number: "15", sid: "GEN 1:15" }, + "Tell the Israelites that I, the ", + { type: "char", marker: "nd", content: ["Lord"] }, + ", the God of their ancestors, the God of Abraham, Isaac, and Jacob,", + ], + } + """ + )!; + + /// + /// Includes various nonstandard features we want to support in the + /// spirit of generously supporting user data. + /// + /// Additional test features: + /// - Preserve contents of `ca` even though it seems possible `ca` should not occur as its own marker + /// - Preserve non-standard contents of `b` marker that should not have contents + /// - Preserve closed attribute on character marker + /// - Reorder attributes to match UsxUsfmParserSink output. + /// + public const string UsxGen1V1Nonstandard = $""" + + Some Scripture Version + + + the first verse + the second verse 4 + + This should not be here + + + """; + + public static readonly Usj UsjGen1V1Nonstandard = JsonConvert.DeserializeObject( + $$""" + { + type: "{{Usj.UsjType}}", + version: "{{Usj.UsjVersion}}", + content: [ + { type: "book", marker: "id", code: "GEN", content: ["Some Scripture Version"] }, + { type: "chapter", marker: "c", number: "1", sid: "GEN 1" }, + { + type: "para", + marker: "p", + content: [ + { type: "verse", marker: "v", number: "1", sid: "GEN 1:1" }, + "the ", + { + type: "char", + marker: "nd", + closed: "false", + content: [ + "first verse ", + { type: "verse", marker: "v", number: "2", sid: "GEN 1:2" }, + "the second verse ", + { type: "char", marker: "ca", content: ["4"] }, + ], + }, + ], + }, + { type: "para", marker: "b", content: ["This should not be here"] }, + ], + } + """ + )!; + + /// + /// Tests removing structural whitespace (see https://docs.usfm.bible/usfm/latest/whitespace.html) in + /// USX while preserving content whitespace. + /// + /// Includes various strange whitespace quirks that Paratext supports. + /// + /// For example, Paratext's UsfmToken.RegularizeSpaces does not deduplicate U+3000 (IDEOGRAPHIC SPACE) + /// after other whitespace and does not deduplicate other whitespace after U+3000 (IDEOGRAPHIC SPACE). + /// However, it does deduplicate multiple U+3000 (IDEOGRAPHIC SPACE) in a row. + /// + /// TODO: Also test ZWSP and its quirks. + /// TODO: Especially concerning is that the editor inserts a bunch of ZWSP in many places in the editable state. + /// + /// Attributes are reordered to match UsxUsfmParserSink output. + public const string UsxGen1V1Whitespace = $""" + + + + space between each{IDEOGRAPHIC_SPACE}word should{THIN_SPACE}{IDEOGRAPHIC_SPACE} stay + + + """; + + public static readonly Usj UsjGen1V1Whitespace = JsonConvert.DeserializeObject( + $$""" + { + type: "{{Usj.UsjType}}", + version: "{{Usj.UsjVersion}}", + content: [ + { type: "book", marker: "id", code: "GEN" }, + { type: "chapter", marker: "c", number: "1", sid: "GEN 1" }, + { type: "verse", marker: "v", number: "1", sid: "GEN 1:1" }, + { type: "char", marker: "nd", content: ["space"] }, + " ", + { type: "char", marker: "wj", content: ["between"] }, + " ", + { type: "char", marker: "nd", content: ["each"] }, + "{{IDEOGRAPHIC_SPACE}}", + { type: "char", marker: "wj", content: ["word"] }, + " ", + { type: "char", marker: "nd", content: ["should"] }, + "{{THIN_SPACE}}{{IDEOGRAPHIC_SPACE}} ", + { type: "char", marker: "wj", content: ["stay"] }, + ], + } + """ + )!; + + /// + /// Genesis 1:1 in USX (with attributes to remove). + /// + /// + /// This is a version of with attributes that will be removed. + /// If round-tripping, compare the final USX to . + /// + public const string UsxGen1V1WithAttributesToRemove = $""" + + Some Scripture Version + + + the first verse + the second verse + Tell the Israelites that I, the Lord, the God of their ancestors, the God of Abraham, Isaac, and Jacob,4 + + + “There is no help for him in God.”3:2 The Hebrew word rendered “God” is “אֱלֹהִ֑ים” (Elohim). Selah. + + + """; + + /// + /// Genesis 1:1 in USX (with a table). + /// + public const string UsxGen1V1WithTable = $""" + + Some Scripture Version + + + + Tribe + Leader + Number + +
+ + the first verse + + +
+ """; + + /// + /// Genesis 1:1 in USJ (with a table). + /// + public static readonly Usj UsjGen1V1WithTable = JsonConvert.DeserializeObject( + $$""" + { + type: "{{Usj.UsjType}}", + version: "{{Usj.UsjVersion}}", + content: [ + { type: "book", marker: "id", code: "GEN", content: ["Some Scripture Version"] }, + { type: "chapter", marker: "c", number: "1", sid: "GEN 1" }, + { + type: "table", + content: [ + { + type: "table:row", + marker: "tr", + content: [ + { type: "table:cell", marker: "th1", align: "start", content: ["Tribe "] }, + { type: "table:cell", marker: "th2", align: "start", content: ["Leader "] }, + { type: "table:cell", marker: "th3", align: "start", content: ["Number "] }, + ], + }, + ], + }, + { + type: "para", + marker: "p", + content: [ + { type: "verse", marker: "v", number: "1", sid: "GEN 1:1" }, + "the first verse ", + ], + }, + ], + } + """ + )!; +} diff --git a/test/SIL.Converters.Usj.Tests/UsjContentConverterTests.cs b/test/SIL.Converters.Usj.Tests/UsjContentConverterTests.cs new file mode 100644 index 0000000000..c678222700 --- /dev/null +++ b/test/SIL.Converters.Usj.Tests/UsjContentConverterTests.cs @@ -0,0 +1,22 @@ +using Newtonsoft.Json; +using Newtonsoft.Json.Linq; +using NUnit.Framework; + +namespace SIL.Converters.Usj.Tests; + +[TestFixture] +public class UsjContentConverterTests +{ + [Test] + public void ShouldReadAndWriteJson() + { + Usj? usj = JsonConvert.DeserializeObject(TestData.JsonGen1V1, new UsjContentConverter()); + Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1).UsingPropertiesComparer()); + string json = JsonConvert.SerializeObject(usj, new UsjContentConverter()); + + // The properties will be out of order, so we need to parse the JSON and compare the JTokens. + JToken actual = JToken.Parse(json); + JToken expected = JToken.Parse(TestData.JsonGen1V1); + Assert.That(JToken.DeepEquals(actual, expected), Is.True); + } +} diff --git a/test/SIL.Converters.Usj.Tests/UsjToUsxTests.cs b/test/SIL.Converters.Usj.Tests/UsjToUsxTests.cs new file mode 100644 index 0000000000..b45f3f06e8 --- /dev/null +++ b/test/SIL.Converters.Usj.Tests/UsjToUsxTests.cs @@ -0,0 +1,77 @@ +using System; +using NUnit.Framework; + +namespace SIL.Converters.Usj.Tests; + +[TestFixture] +public class UsjToUsxTests +{ + [Test] + public void ShouldConvertFromEmptyUsjToUsx() + { + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(TestData.UsjEmpty); + Assert.That(usx, Is.EqualTo(TestData.UsxEmpty)); + } + + [Test] + public void ShouldConvertFromUsjToUsx() + { + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(TestData.UsjGen1V1); + usx = TestData.RemoveXmlWhiteSpace(usx); + Assert.That(usx, Is.EqualTo(TestData.RemoveXmlWhiteSpace(TestData.UsxGen1V1))); + } + + [Test] + public void ShouldConvertFromUsjToUsxAndBack() + { + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(TestData.UsjGen1V1); + Usj usj = UsxToUsj.UsxStringToUsj(usx); + Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1).UsingPropertiesComparer()); + } + + [Test] + public void ShouldConvertFromUsjWithImpliedParagraphsToUsx() + { + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(TestData.UsjGen1V1ImpliedPara); + usx = TestData.RemoveXmlWhiteSpace(usx); + Assert.That(usx, Is.EqualTo(TestData.RemoveXmlWhiteSpace(TestData.UsxGen1V1ImpliedPara))); + } + + [Test] + public void ShouldConvertFromUsjWithNonStandardFeaturesToUsx() + { + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(TestData.UsjGen1V1Nonstandard); + usx = TestData.RemoveXmlWhiteSpace(usx); + Assert.That(usx, Is.EqualTo(TestData.RemoveXmlWhiteSpace(TestData.UsxGen1V1Nonstandard))); + } + + [Test] + public void ShouldConvertFromUsjToUsxWithSpecialWhiteSpace() + { + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(TestData.UsjGen1V1Whitespace); + usx = TestData.RemoveXmlWhiteSpace(usx); + Assert.That(usx, Is.EqualTo(TestData.RemoveXmlWhiteSpace(TestData.UsxGen1V1Whitespace))); + } + + [Test] + public void ShouldConvertFromUsjToUsxWithTable() + { + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(TestData.UsjGen1V1WithTable); + usx = TestData.RemoveXmlWhiteSpace(usx); + Assert.That(usx, Is.EqualTo(TestData.RemoveXmlWhiteSpace(TestData.UsxGen1V1WithTable))); + } + + [Test] + public void ShouldNotAllowInvalidContent() + { + var usjToUsx = new UsjToUsx(); + Assert.Throws(() => usjToUsx.UsjToUsxString(new Usj { Content = [new Usj()] })); + } +} diff --git a/test/SIL.Converters.Usj.Tests/UsxToUsjTests.cs b/test/SIL.Converters.Usj.Tests/UsxToUsjTests.cs new file mode 100644 index 0000000000..c68872ed1f --- /dev/null +++ b/test/SIL.Converters.Usj.Tests/UsxToUsjTests.cs @@ -0,0 +1,80 @@ +using System.Xml; +using NUnit.Framework; + +namespace SIL.Converters.Usj.Tests; + +[TestFixture] +public class UsxToUsjTests +{ + [Test] + public void ShouldConvertFromEmptyUsxToUsj() + { + Usj usj = UsxToUsj.UsxStringToUsj(TestData.UsxEmpty); + Assert.That(usj, Is.EqualTo(TestData.UsjEmpty).UsingPropertiesComparer()); + } + + [Test] + public void ShouldConvertFromNullToUsj() + { + Usj usj = UsxToUsj.UsxStringToUsj(null); + Assert.That(usj, Is.EqualTo(TestData.UsjEmpty).UsingPropertiesComparer()); + } + + [Test] + public void ShouldConvertFromNullXmlDocumentToUsj() + { + Usj usj = UsxToUsj.UsxXmlDocumentToUsj(null); + Assert.That(usj, Is.EqualTo(TestData.UsjEmpty).UsingPropertiesComparer()); + } + + [Test] + public void ShouldConvertFromUsxToUsj() + { + Usj usj = UsxToUsj.UsxStringToUsj(TestData.UsxGen1V1); + Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1).UsingPropertiesComparer()); + } + + [Test] + public void ShouldConvertFromUsxToUsjAndRemoveSpecificAttributes() + { + Usj usj = UsxToUsj.UsxStringToUsj(TestData.UsxGen1V1WithAttributesToRemove); + Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1).UsingPropertiesComparer()); + } + + [Test] + public void ShouldConvertFromUsxToUsjWithImpliedParagraphs() + { + Usj usj = UsxToUsj.UsxStringToUsj(TestData.UsxGen1V1ImpliedPara); + Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1ImpliedPara).UsingPropertiesComparer()); + } + + [Test] + public void ShouldConvertFromUsxToUsjWithSpecialWhiteSpace() + { + Usj usj = UsxToUsj.UsxStringToUsj(TestData.UsxGen1V1Whitespace); + Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1Whitespace).UsingPropertiesComparer()); + } + + [Test] + public void ShouldConvertFromUsxToUsjWithNonStandardFeatures() + { + Usj usj = UsxToUsj.UsxStringToUsj(TestData.UsxGen1V1Nonstandard); + Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1Nonstandard).UsingPropertiesComparer()); + } + + [Test] + public void ShouldConvertFromUsxToUsjWithTable() + { + Usj usj = UsxToUsj.UsxStringToUsj(TestData.UsxGen1V1WithTable); + Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1WithTable).UsingPropertiesComparer()); + } + + [Test] + public void ShouldConvertFromXmlDocumentToUsj() + { + XmlDocument document = new XmlDocument { PreserveWhitespace = true }; + document.LoadXml(TestData.UsxGen1V1); + Usj usj = UsxToUsj.UsxXmlDocumentToUsj(document); + Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1).UsingPropertiesComparer()); + } +} diff --git a/xForge.sln b/xForge.sln index 6d5ea6a096..7aa794a6bb 100644 --- a/xForge.sln +++ b/xForge.sln @@ -27,6 +27,10 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UpdateHelp", "src\Help\Upda EndProject Project("{E53339B2-1760-4266-BCC7-CA923CBCF16C}") = "Docker", "src\Docker\Docker.dcproj", "{FF8C61CF-091F-43A9-A8DE-336C6021D543}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SIL.Converters.Usj", "src\SIL.Converters.Usj\SIL.Converters.Usj.csproj", "{1A391C63-C9DB-4C50-A83D-36BD5FF31C63}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SIL.Converters.Usj.Tests", "test\SIL.Converters.Usj.Tests\SIL.Converters.Usj.Tests.csproj", "{165B1F82-ECB0-4E08-99A7-26359CC1A203}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -57,6 +61,14 @@ Global {FF8C61CF-091F-43A9-A8DE-336C6021D543}.Debug|Any CPU.Build.0 = Debug|Any CPU {FF8C61CF-091F-43A9-A8DE-336C6021D543}.Release|Any CPU.ActiveCfg = Release|Any CPU {FF8C61CF-091F-43A9-A8DE-336C6021D543}.Release|Any CPU.Build.0 = Release|Any CPU + {1A391C63-C9DB-4C50-A83D-36BD5FF31C63}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {1A391C63-C9DB-4C50-A83D-36BD5FF31C63}.Debug|Any CPU.Build.0 = Debug|Any CPU + {1A391C63-C9DB-4C50-A83D-36BD5FF31C63}.Release|Any CPU.ActiveCfg = Release|Any CPU + {1A391C63-C9DB-4C50-A83D-36BD5FF31C63}.Release|Any CPU.Build.0 = Release|Any CPU + {165B1F82-ECB0-4E08-99A7-26359CC1A203}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {165B1F82-ECB0-4E08-99A7-26359CC1A203}.Debug|Any CPU.Build.0 = Debug|Any CPU + {165B1F82-ECB0-4E08-99A7-26359CC1A203}.Release|Any CPU.ActiveCfg = Release|Any CPU + {165B1F82-ECB0-4E08-99A7-26359CC1A203}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -69,6 +81,8 @@ Global {15D690DC-B5C1-4612-BA9F-F0A1AC067ADC} = {FAE8A070-D147-4FEB-B263-14C0BF50DEAE} {9C77764A-F195-4D45-98C3-80A7782A3A2D} = {15D690DC-B5C1-4612-BA9F-F0A1AC067ADC} {FF8C61CF-091F-43A9-A8DE-336C6021D543} = {FAE8A070-D147-4FEB-B263-14C0BF50DEAE} + {1A391C63-C9DB-4C50-A83D-36BD5FF31C63} = {FAE8A070-D147-4FEB-B263-14C0BF50DEAE} + {165B1F82-ECB0-4E08-99A7-26359CC1A203} = {0584AC24-20DC-4BEB-B537-2DB709E43292} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {7F3BD410-097C-457C-9FDF-547598955EAD} From 744fe3ae63d9ef5deb0832ff3cd9b9e1c04ddf4c Mon Sep 17 00:00:00 2001 From: Peter Chapman Date: Wed, 29 Jan 2025 08:43:59 +1300 Subject: [PATCH 2/4] SF-3184 Add USJ support to the get pre-translations API --- .../Controllers/MachineApiController.cs | 56 +++++++++ src/SIL.XForge.Scripture/Models/MachineApi.cs | 2 + .../SIL.XForge.Scripture.csproj | 1 + .../Services/IMachineApiService.cs | 8 ++ .../Services/MachineApiService.cs | 37 ++++++ .../Controllers/MachineApiControllerTests.cs | 118 ++++++++++++++++++ .../Services/MachineApiServiceTests.cs | 73 +++++++++++ 7 files changed, 295 insertions(+) diff --git a/src/SIL.XForge.Scripture/Controllers/MachineApiController.cs b/src/SIL.XForge.Scripture/Controllers/MachineApiController.cs index 9d3448f3c6..cf5682a522 100644 --- a/src/SIL.XForge.Scripture/Controllers/MachineApiController.cs +++ b/src/SIL.XForge.Scripture/Controllers/MachineApiController.cs @@ -7,6 +7,7 @@ using Microsoft.AspNetCore.Mvc; using Polly.CircuitBreaker; using Serval.Client; +using SIL.Converters.Usj; using SIL.XForge.Models; using SIL.XForge.Realtime; using SIL.XForge.Scripture.Models; @@ -427,6 +428,61 @@ CancellationToken cancellationToken } } + /// + /// Gets the pre-translations for the specified chapter as USJ. + /// + /// The Scripture Forge project identifier. + /// The book number. + /// The chapter number. If zero, the entire book is returned. + /// The cancellation token. + /// The pre-translations were successfully queried for. + /// You do not have permission to retrieve the pre-translations for this project. + /// The project does not exist or is not configured on the ML server. + /// Retrieving the pre-translations in this format is not supported. + /// The engine has not been built on the ML server. + /// The ML server is temporarily unavailable or unresponsive. + [HttpGet(MachineApi.GetPreTranslationUsj)] + public async Task> GetPreTranslationUsjAsync( + string sfProjectId, + int bookNum, + int chapterNum, + CancellationToken cancellationToken + ) + { + try + { + Usj usj = await _machineApiService.GetPreTranslationUsjAsync( + _userAccessor.UserId, + sfProjectId, + bookNum, + chapterNum, + cancellationToken + ); + return Ok(usj); + } + catch (BrokenCircuitException e) + { + _exceptionHandler.ReportException(e); + return StatusCode(StatusCodes.Status503ServiceUnavailable, MachineApiUnavailable); + } + catch (DataNotFoundException) + { + return NotFound(); + } + catch (ForbiddenException) + { + return Forbid(); + } + catch (InvalidOperationException) + { + return Conflict(); + } + catch (NotSupportedException) + { + return new StatusCodeResult(StatusCodes.Status405MethodNotAllowed); + } + } + /// /// Gets the pre-translations for the specified chapter as USX. /// diff --git a/src/SIL.XForge.Scripture/Models/MachineApi.cs b/src/SIL.XForge.Scripture/Models/MachineApi.cs index cc76baa421..26f171363d 100644 --- a/src/SIL.XForge.Scripture/Models/MachineApi.cs +++ b/src/SIL.XForge.Scripture/Models/MachineApi.cs @@ -27,6 +27,8 @@ public static class MachineApi "translation/engines/project:{sfProjectId}/actions/preTranslate/{bookNum}_{chapterNum}/delta"; public const string GetPreTranslationUsfm = "translation/engines/project:{sfProjectId}/actions/preTranslate/{bookNum}_{chapterNum}/usfm"; + public const string GetPreTranslationUsj = + "translation/engines/project:{sfProjectId}/actions/preTranslate/{bookNum}_{chapterNum}/usj"; public const string GetPreTranslationUsx = "translation/engines/project:{sfProjectId}/actions/preTranslate/{bookNum}_{chapterNum}/usx"; public const string GetLastCompletedPreTranslationBuild = diff --git a/src/SIL.XForge.Scripture/SIL.XForge.Scripture.csproj b/src/SIL.XForge.Scripture/SIL.XForge.Scripture.csproj index f50af68bbc..73b7c8be61 100644 --- a/src/SIL.XForge.Scripture/SIL.XForge.Scripture.csproj +++ b/src/SIL.XForge.Scripture/SIL.XForge.Scripture.csproj @@ -62,6 +62,7 @@ + diff --git a/src/SIL.XForge.Scripture/Services/IMachineApiService.cs b/src/SIL.XForge.Scripture/Services/IMachineApiService.cs index 7572ab8846..77a0b906ec 100644 --- a/src/SIL.XForge.Scripture/Services/IMachineApiService.cs +++ b/src/SIL.XForge.Scripture/Services/IMachineApiService.cs @@ -2,6 +2,7 @@ using System.Threading.Tasks; using Autofac.Extras.DynamicProxy; using Serval.Client; +using SIL.Converters.Usj; using SIL.XForge.EventMetrics; using SIL.XForge.Realtime; using SIL.XForge.Scripture.Models; @@ -67,6 +68,13 @@ Task GetPreTranslationUsfmAsync( bool isServalAdmin, CancellationToken cancellationToken ); + Task GetPreTranslationUsjAsync( + string curUserId, + string sfProjectId, + int bookNum, + int chapterNum, + CancellationToken cancellationToken + ); Task GetPreTranslationUsxAsync( string curUserId, string sfProjectId, diff --git a/src/SIL.XForge.Scripture/Services/MachineApiService.cs b/src/SIL.XForge.Scripture/Services/MachineApiService.cs index e95f21282d..53e33531e8 100644 --- a/src/SIL.XForge.Scripture/Services/MachineApiService.cs +++ b/src/SIL.XForge.Scripture/Services/MachineApiService.cs @@ -11,6 +11,7 @@ using Microsoft.Extensions.Options; using Newtonsoft.Json; using Serval.Client; +using SIL.Converters.Usj; using SIL.ObjectModel; using SIL.XForge.Configuration; using SIL.XForge.DataAccess; @@ -558,6 +559,42 @@ CancellationToken cancellationToken } } + public async Task GetPreTranslationUsjAsync( + string curUserId, + string sfProjectId, + int bookNum, + int chapterNum, + CancellationToken cancellationToken + ) + { + // Ensure that the user has permission + SFProject project = await EnsureProjectPermissionAsync(curUserId, sfProjectId); + + // Retrieve the user secret + Attempt attempt = await userSecrets.TryGetAsync(curUserId); + if (!attempt.TryResult(out UserSecret userSecret)) + { + throw new DataNotFoundException("The user does not exist."); + } + + try + { + string usfm = await preTranslationService.GetPreTranslationUsfmAsync( + sfProjectId, + bookNum, + chapterNum, + cancellationToken + ); + string usx = paratextService.GetBookText(userSecret, project.ParatextId, bookNum, usfm); + return UsxToUsj.UsxStringToUsj(usx); + } + catch (ServalApiException e) + { + ProcessServalApiException(e); + throw; + } + } + public async Task GetPreTranslationUsxAsync( string curUserId, string sfProjectId, diff --git a/test/SIL.XForge.Scripture.Tests/Controllers/MachineApiControllerTests.cs b/test/SIL.XForge.Scripture.Tests/Controllers/MachineApiControllerTests.cs index 03de4cc6d5..63f780e51a 100644 --- a/test/SIL.XForge.Scripture.Tests/Controllers/MachineApiControllerTests.cs +++ b/test/SIL.XForge.Scripture.Tests/Controllers/MachineApiControllerTests.cs @@ -9,6 +9,7 @@ using NUnit.Framework; using Polly.CircuitBreaker; using Serval.Client; +using SIL.Converters.Usj; using SIL.XForge.Models; using SIL.XForge.Realtime; using SIL.XForge.Scripture.Models; @@ -1045,6 +1046,123 @@ await env .GetPreTranslationUsfmAsync(User01, Project01, 40, 1, false, CancellationToken.None); } + [Test] + public async Task GetPreTranslationUsjAsync_MachineApiDown() + { + // Set up test environment + var env = new TestEnvironment(); + env.MachineApiService.GetPreTranslationUsjAsync(User01, Project01, 40, 1, CancellationToken.None) + .Throws(new BrokenCircuitException()); + + // SUT + ActionResult actual = await env.Controller.GetPreTranslationUsjAsync( + Project01, + 40, + 1, + CancellationToken.None + ); + + env.ExceptionHandler.Received(1).ReportException(Arg.Any()); + Assert.IsInstanceOf(actual.Result); + Assert.AreEqual(StatusCodes.Status503ServiceUnavailable, (actual.Result as ObjectResult)?.StatusCode); + } + + [Test] + public async Task GetPreTranslationUsjAsync_NoPermission() + { + // Set up test environment + var env = new TestEnvironment(); + env.MachineApiService.GetPreTranslationUsjAsync(User01, Project01, 40, 1, CancellationToken.None) + .Throws(new ForbiddenException()); + + // SUT + ActionResult actual = await env.Controller.GetPreTranslationUsjAsync( + Project01, + 40, + 1, + CancellationToken.None + ); + + Assert.IsInstanceOf(actual.Result); + } + + [Test] + public async Task GetPreTranslationUsjAsync_NoProject() + { + // Set up test environment + var env = new TestEnvironment(); + env.MachineApiService.GetPreTranslationUsjAsync(User01, Project01, 40, 1, CancellationToken.None) + .Throws(new DataNotFoundException(string.Empty)); + + // SUT + ActionResult actual = await env.Controller.GetPreTranslationUsjAsync( + Project01, + 40, + 1, + CancellationToken.None + ); + + Assert.IsInstanceOf(actual.Result); + } + + [Test] + public async Task GetPreTranslationUsjAsync_NotBuilt() + { + // Set up test environment + var env = new TestEnvironment(); + env.MachineApiService.GetPreTranslationUsjAsync(User01, Project01, 40, 1, CancellationToken.None) + .Throws(new InvalidOperationException()); + + // SUT + ActionResult actual = await env.Controller.GetPreTranslationUsjAsync( + Project01, + 40, + 1, + CancellationToken.None + ); + + Assert.IsInstanceOf(actual.Result); + } + + [Test] + public async Task GetPreTranslationUsjAsync_NotSupported() + { + // Set up test environment + var env = new TestEnvironment(); + env.MachineApiService.GetPreTranslationUsjAsync(User01, Project01, 40, 1, CancellationToken.None) + .Throws(new NotSupportedException()); + + // SUT + ActionResult actual = await env.Controller.GetPreTranslationUsjAsync( + Project01, + 40, + 1, + CancellationToken.None + ); + + Assert.IsInstanceOf(actual.Result); + Assert.AreEqual(StatusCodes.Status405MethodNotAllowed, (actual.Result as IStatusCodeActionResult)?.StatusCode); + } + + [Test] + public async Task GetPreTranslationUsjAsync_Success() + { + // Set up test environment + var env = new TestEnvironment(); + env.MachineApiService.GetPreTranslationUsjAsync(User01, Project01, 40, 1, CancellationToken.None) + .Returns(Task.FromResult(new Usj())); + + // SUT + ActionResult actual = await env.Controller.GetPreTranslationUsjAsync( + Project01, + 40, + 1, + CancellationToken.None + ); + + Assert.IsInstanceOf(actual.Result); + } + [Test] public async Task GetPreTranslationUsxAsync_MachineApiDown() { diff --git a/test/SIL.XForge.Scripture.Tests/Services/MachineApiServiceTests.cs b/test/SIL.XForge.Scripture.Tests/Services/MachineApiServiceTests.cs index f022dd2a23..e2d09a3eac 100644 --- a/test/SIL.XForge.Scripture.Tests/Services/MachineApiServiceTests.cs +++ b/test/SIL.XForge.Scripture.Tests/Services/MachineApiServiceTests.cs @@ -14,6 +14,7 @@ using NUnit.Framework; using Polly.CircuitBreaker; using Serval.Client; +using SIL.Converters.Usj; using SIL.XForge.DataAccess; using SIL.XForge.Models; using SIL.XForge.Realtime; @@ -1263,6 +1264,78 @@ public async Task GetPreTranslationUsfmAsync_Success() Assert.AreEqual(expected, usfm); } + [Test] + public void GetPreTranslationUsjAsync_CorpusDoesNotSupportUsfm() + { + // Set up test environment + var env = new TestEnvironment(); + env.PreTranslationService.GetPreTranslationUsfmAsync(Project01, 40, 1, CancellationToken.None) + .Throws(ServalApiExceptions.InvalidCorpus); + + // SUT + Assert.ThrowsAsync( + () => env.Service.GetPreTranslationUsjAsync(User01, Project01, 40, 1, CancellationToken.None) + ); + } + + [Test] + public async Task GetPreTranslationUsjAsync_MissingUserSecret() + { + // Set up test environment + var env = new TestEnvironment(); + await env.UserSecrets.DeleteAllAsync(_ => true); + + // SUT + Assert.ThrowsAsync( + () => env.Service.GetPreTranslationUsjAsync(User01, Project01, 40, 1, CancellationToken.None) + ); + } + + [Test] + public async Task GetPreTranslationUsjAsync_Success() + { + // Set up test environment + var env = new TestEnvironment(); + const string usfm = "\\c 1 \\v1 Verse 1"; + const string usx = + "" + + "Verse 1"; + Usj expected = new Usj + { + Type = Usj.UsjType, + Version = Usj.UsjVersion, + Content = + [ + new UsjMarker + { + Type = "book", + Marker = "id", + Code = "MAT", + }, + new UsjMarker + { + Type = "chapter", + Marker = "c", + Number = "1", + }, + new UsjMarker + { + Type = "verse", + Marker = "v", + Number = "1", + }, + "Verse 1", + ], + }; + env.PreTranslationService.GetPreTranslationUsfmAsync(Project01, 40, 1, CancellationToken.None) + .Returns(Task.FromResult(usfm)); + env.ParatextService.GetBookText(Arg.Any(), Arg.Any(), 40, usfm).Returns(usx); + + // SUT + Usj actual = await env.Service.GetPreTranslationUsjAsync(User01, Project01, 40, 1, CancellationToken.None); + Assert.That(actual, Is.EqualTo(expected).UsingPropertiesComparer()); + } + [Test] public void GetPreTranslationUsxAsync_CorpusDoesNotSupportUsfm() { From 7a830876ad72851b827d4f7f83257567b2484b98 Mon Sep 17 00:00:00 2001 From: Peter Chapman Date: Wed, 5 Feb 2025 14:31:01 +1300 Subject: [PATCH 3/4] Code review fixes --- src/SIL.Converters.Usj/Usj.cs | 4 +- src/SIL.Converters.Usj/UsjBase.cs | 6 +- src/SIL.Converters.Usj/UsjMarker.cs | 11 +- src/SIL.Converters.Usj/UsjToUsx.cs | 51 ++++- src/SIL.Converters.Usj/Usx.cs | 4 +- src/SIL.Converters.Usj/UsxToUsj.cs | 21 +- test/SIL.Converters.Usj.Tests/TestData.cs | 193 ++++++++++++++---- .../SIL.Converters.Usj.Tests/UsjToUsxTests.cs | 124 ++++++++++- .../SIL.Converters.Usj.Tests/UsxToUsjTests.cs | 145 ++++++++++++- 9 files changed, 495 insertions(+), 64 deletions(-) diff --git a/src/SIL.Converters.Usj/Usj.cs b/src/SIL.Converters.Usj/Usj.cs index 719bc7e997..42a34fd67a 100644 --- a/src/SIL.Converters.Usj/Usj.cs +++ b/src/SIL.Converters.Usj/Usj.cs @@ -9,12 +9,12 @@ public class Usj : UsjBase, IUsj /// /// The supported USJ spec type. /// - public const string UsjType = "USJ"; + public static readonly string UsjType = "USJ"; /// /// The supported USJ spec version. /// - public const string UsjVersion = "3.1"; + public static readonly string UsjVersion = "3.1"; /// /// The USJ spec version. diff --git a/src/SIL.Converters.Usj/UsjBase.cs b/src/SIL.Converters.Usj/UsjBase.cs index d5687850d7..a40c76d1d8 100644 --- a/src/SIL.Converters.Usj/UsjBase.cs +++ b/src/SIL.Converters.Usj/UsjBase.cs @@ -22,7 +22,11 @@ public abstract class UsjBase /// The JSON representation of scripture contents from USFM/USX. /// /// This will either be a or . - /// Nullable. The contents will be laid out in order. + /// + /// Nullable. + /// If there are no contents, this will be null for , or empty for . + /// The contents will be laid out in order. + /// [JsonConverter(typeof(UsjContentConverter))] public ArrayList Content { get; set; } diff --git a/src/SIL.Converters.Usj/UsjMarker.cs b/src/SIL.Converters.Usj/UsjMarker.cs index a6b6486bf0..06fd597866 100644 --- a/src/SIL.Converters.Usj/UsjMarker.cs +++ b/src/SIL.Converters.Usj/UsjMarker.cs @@ -12,13 +12,14 @@ public class UsjMarker : UsjBase public string Marker { get; set; } /// - /// Indicates the Book-chapter-verse value in the paragraph based structure. + /// The milestone start ID, which indicates the Book-chapter-verse value in the paragraph based structure. /// /// Nullable. public string Sid { get; set; } /// - /// Milestone end ID, matches start ID (not currently included in USJ spec). + /// Milestone end ID, which matches the milestone start ID . + /// is not specified in the USJ spec, but is kept for USX compatibility. /// /// Nullable. public string Eid { get; set; } @@ -42,8 +43,12 @@ public class UsjMarker : UsjBase public string AltNumber { get; set; } /// - /// Published character of chapter or verse. + /// Published character of a chapter or verse. /// + /// + /// This can be a letter (I, II, etc.), a number (1, 2, ...), or both. + /// It is only displayed in the published version of the scripture text. + /// /// Nullable. public string PubNumber { get; set; } diff --git a/src/SIL.Converters.Usj/UsjToUsx.cs b/src/SIL.Converters.Usj/UsjToUsx.cs index 52fd915c0f..2392a0348c 100644 --- a/src/SIL.Converters.Usj/UsjToUsx.cs +++ b/src/SIL.Converters.Usj/UsjToUsx.cs @@ -61,6 +61,13 @@ private static void SetAttributes(XmlElement element, UsjMarker markerContent) foreach (KeyValuePair nonStandardAttribute in markerContent.AdditionalData) { string key = nonStandardAttribute.Key; + + // Include any non-standard attributes that are strings for compatibility with USX. + // + // Notes: + // - If the non-standard attribute is not a string, discard it as it is invalid. + // - Type and marker are handled above, so do not repeat them. + // - Content is a collection handled in ConvertUsjRecurse. if (nonStandardAttribute.Value is string value && key != "type" && key != "marker" && key != "content") { element.SetAttribute(key, value); @@ -103,10 +110,14 @@ bool isLastItem throw new ArgumentOutOfRangeException(nameof(markerContent)); } + // Store the previous verse eid so we can close them in the correct place + string lastVerseEid = null; + // Create chapter and verse end elements from SID attributes. if (_verseEid != null && (type == "verse" || (parentElement.Name == "para" && isLastItem))) { eidElement = CreateVerseEndElement(usxDoc); + lastVerseEid = _verseEid; _verseEid = null; } @@ -126,15 +137,28 @@ bool isLastItem _chapterEid = usjMarker.Sid; } - // Append to parent. + // See if we are at a new verse + if (eidElement != null && isLastItem && _verseEid != null && _verseEid != lastVerseEid) + { + // Write the eid element for the previous verse + parentElement.AppendChild(eidElement); + + // Ensure that eid element for the current verse is not written + eidElement = null; + _verseEid = null; + } + + // Append to parent to close the verse or chapter before this new node if (eidElement != null && !isLastItem) { parentElement.AppendChild(eidElement); + eidElement = null; } parentElement.AppendChild(node); - if (eidElement != null && isLastItem) + // Append the eid element as this is the last element + if (eidElement != null) { parentElement.AppendChild(eidElement); } @@ -167,18 +191,35 @@ private void UsjToUsxDom(IUsj usj, XmlDocument usxDoc) } /// - /// Converts a USJ object to a USX string. + /// Converts a USJ object to a USX . /// /// The USJ object. - /// The USX as a string. - public string UsjToUsxString(IUsj usj) + /// The XML Document. + public XmlDocument UsjToUsxXmlDocument(IUsj usj) { + // Reset any instance variables + _chapterEid = null; + _verseEid = null; + + // Create the USX document XmlDocument usxDoc = new XmlDocument { PreserveWhitespace = true }; XmlElement documentElement = usxDoc.CreateElement(Usx.UsxType); documentElement.SetAttribute("version", Usx.UsxVersion); usxDoc.AppendChild(documentElement); UsjToUsxDom(usj, usxDoc); + return usxDoc; + } + + /// + /// Converts a USJ object to a USX string. + /// + /// The USJ object. + /// The USX as a string. + public string UsjToUsxString(IUsj usj) + { + XmlDocument usxDoc = UsjToUsxXmlDocument(usj); + // Output as a string using (StringWriter stringWriter = new StringWriter()) { // These settings conform to ParatextData.UsfmToUsx diff --git a/src/SIL.Converters.Usj/Usx.cs b/src/SIL.Converters.Usj/Usx.cs index ab8036a7cd..b79c9fc646 100644 --- a/src/SIL.Converters.Usj/Usx.cs +++ b/src/SIL.Converters.Usj/Usx.cs @@ -9,11 +9,11 @@ public static class Usx /// /// The USX spec type. /// - public const string UsxType = "usx"; + public static readonly string UsxType = "usx"; /// /// The USX spec version. /// - public const string UsxVersion = "3.0"; + public static readonly string UsxVersion = "3.0"; } } diff --git a/src/SIL.Converters.Usj/UsxToUsj.cs b/src/SIL.Converters.Usj/UsxToUsj.cs index 58628942cd..ced7db931f 100644 --- a/src/SIL.Converters.Usj/UsxToUsj.cs +++ b/src/SIL.Converters.Usj/UsxToUsj.cs @@ -1,3 +1,4 @@ +using System; using System.Collections; using System.Collections.Generic; using System.Linq; @@ -33,7 +34,7 @@ private static (T, bool) UsxDomToUsjRecurse(XmlElement usxElement) .Attributes.Cast() .ToDictionary(attrib => attrib.Name, attrib => attrib.Value); - // If style is present, make that the market + // If style is present, make that the marker if (attributes.TryGetValue("style", out string marker)) { attributes.Remove("style"); @@ -105,6 +106,9 @@ private static (T, bool) UsxDomToUsjRecurse(XmlElement usxElement) outObj.Content.Add(childDict); } + // If the next sibling is text or a user inputted space (a single space), add it to the content. + // We skip whitespace nodes with more than one space because they are padding, not formatting spaces. + // Note: Any Paratext 9.5 special whitespace characters will have a node type of XmlNodeType.Text. if ( child.NextSibling?.NodeType == XmlNodeType.Text || (child.NextSibling?.NodeType == XmlNodeType.Whitespace && child.NextSibling.Value == " ") @@ -121,7 +125,7 @@ private static (T, bool) UsxDomToUsjRecurse(XmlElement usxElement) if (outObj is UsjMarker usjMarker2 && usjMarker2.Eid != null && (type == "verse" || type == "chapter")) { - // Ignore + // Omit any verse or chapter eid elements append = false; } @@ -173,6 +177,17 @@ public static Usj UsxStringToUsj(string usx) /// /// The should have set to true. /// - public static Usj UsxXmlDocumentToUsj(XmlDocument xmlDocument) => UsxDomToUsj(xmlDocument?.DocumentElement); + public static Usj UsxXmlDocumentToUsj(XmlDocument xmlDocument) + { + if (xmlDocument?.PreserveWhitespace != true) + { + throw new ArgumentException( + "The XmlDocument should have PreserveWhitespace set to true.", + nameof(xmlDocument) + ); + } + + return UsxDomToUsj(xmlDocument.DocumentElement); + } } } diff --git a/test/SIL.Converters.Usj.Tests/TestData.cs b/test/SIL.Converters.Usj.Tests/TestData.cs index 7b196b928d..59bedc7711 100644 --- a/test/SIL.Converters.Usj.Tests/TestData.cs +++ b/test/SIL.Converters.Usj.Tests/TestData.cs @@ -32,7 +32,7 @@ public static string RemoveXmlWhiteSpace(string xml) /// /// Genesis 1:1 in USJ as a JSON string. /// - public const string JsonGen1V1 = $$""" + public static readonly string JsonGen1V1 = $$""" { type: "{{Usj.UsjType}}", version: "{{Usj.UsjVersion}}", @@ -50,6 +50,8 @@ public static string RemoveXmlWhiteSpace(string xml) { type: "verse", marker: "v", number: "15", altnumber: "3", sid: "GEN 1:15" }, "Tell the Israelites that I, the ", { type: "char", marker: "nd", content: ["Lord"] }, + " ", + { type: "char", marker: "nd", content: ["God"] }, ", the God of their ancestors, the God of Abraham, Isaac, and Jacob,", { type: "char", marker: "va", content: ["4"] }, ], @@ -88,7 +90,7 @@ public static string RemoveXmlWhiteSpace(string xml) /// /// An empty USX document. /// - public const string UsxEmpty = $""""""; + public static readonly string UsxEmpty = $""""""; /// /// An empty USJ object. @@ -107,17 +109,17 @@ public static string RemoveXmlWhiteSpace(string xml) /// - Whitespace for all self-closing element endings. /// - Reorder attributes to match UsxUsfmParserSink output. /// - public const string UsxGen1V1 = $""" + public static readonly string UsxGen1V1 = $""" Some Scripture Version - - the first verse - the second verse - Tell the Israelites that I, the Lord, the God of their ancestors, the God of Abraham, Isaac, and Jacob,4 - - - “There is no help for him in God.”3:2 The Hebrew word rendered “God” is “אֱלֹהִ֑ים” (Elohim). Selah. + + the first verse + the second verse + Tell the Israelites that I, the Lord God, the God of their ancestors, the God of Abraham, Isaac, and Jacob,4 + + + “There is no help for him in God.”3:2 The Hebrew word rendered “God” is “אֱלֹהִ֑ים” (Elohim). Selah. """; @@ -139,13 +141,13 @@ public static string RemoveXmlWhiteSpace(string xml) /// Tests a chapter with an implied paragraph. /// /// Attributes are reordered to match UsxUsfmParserSink output. - public const string UsxGen1V1ImpliedPara = $""" + public static readonly string UsxGen1V1ImpliedPara = $""" - the first verse - the second verse - Tell the Israelites that I, the Lord, the God of their ancestors, the God of Abraham, Isaac, and Jacob, + the first verse + the second verse + Tell the Israelites that I, the Lord, the God of their ancestors, the God of Abraham, Isaac, and Jacob, """; @@ -178,18 +180,18 @@ public static string RemoveXmlWhiteSpace(string xml) /// Additional test features: /// - Preserve contents of `ca` even though it seems possible `ca` should not occur as its own marker /// - Preserve non-standard contents of `b` marker that should not have contents - /// - Preserve closed attribute on character marker + /// - Preserve closed attribute on character marker. This is non-standard use of a non-standard marker. /// - Reorder attributes to match UsxUsfmParserSink output. /// - public const string UsxGen1V1Nonstandard = $""" + public static readonly string UsxGen1V1Nonstandard = $""" Some Scripture Version - - the first verse - the second verse 4 - - This should not be here + + the first verse + the second verse 4 + + This should not be here """; @@ -241,11 +243,11 @@ public static string RemoveXmlWhiteSpace(string xml) /// TODO: Especially concerning is that the editor inserts a bunch of ZWSP in many places in the editable state. /// /// Attributes are reordered to match UsxUsfmParserSink output. - public const string UsxGen1V1Whitespace = $""" + public static readonly string UsxGen1V1Whitespace = $""" - space between each{IDEOGRAPHIC_SPACE}word should{THIN_SPACE}{IDEOGRAPHIC_SPACE} stay + space between each{IDEOGRAPHIC_SPACE}word should{THIN_SPACE}{IDEOGRAPHIC_SPACE} stay """; @@ -282,17 +284,17 @@ public static string RemoveXmlWhiteSpace(string xml) /// This is a version of with attributes that will be removed. /// If round-tripping, compare the final USX to . /// - public const string UsxGen1V1WithAttributesToRemove = $""" + public static readonly string UsxGen1V1WithAttributesToRemove = $""" Some Scripture Version - - the first verse - the second verse - Tell the Israelites that I, the Lord, the God of their ancestors, the God of Abraham, Isaac, and Jacob,4 - - - “There is no help for him in God.”3:2 The Hebrew word rendered “God” is “אֱלֹהִ֑ים” (Elohim). Selah. + + the first verse + the second verse + Tell the Israelites that I, the Lord God, the God of their ancestors, the God of Abraham, Isaac, and Jacob,4 + + + “There is no help for him in God.”3:2 The Hebrew word rendered “God” is “אֱלֹהִ֑ים” (Elohim). Selah. """; @@ -300,20 +302,20 @@ public static string RemoveXmlWhiteSpace(string xml) /// /// Genesis 1:1 in USX (with a table). /// - public const string UsxGen1V1WithTable = $""" + public static readonly string UsxGen1V1WithTable = $""" Some Scripture Version - - - Tribe - Leader - Number - -
- - the first verse - + + + Tribe + Leader + Number + +
+ + the first verse +
"""; @@ -355,4 +357,111 @@ public static string RemoveXmlWhiteSpace(string xml) } """ )!; + + /// + /// Genesis 1:1 in USJ (with additional blank chapters). + /// + public static readonly Usj UsjGen1V1WithBlankChapters = JsonConvert.DeserializeObject( + $$""" + { + type: "{{Usj.UsjType}}", + version: "{{Usj.UsjVersion}}", + content: [ + { type: "book", marker: "id", code: "GEN" }, + { type: "chapter", marker: "c", number: "1", sid: "GEN 1" }, + { type: "verse", marker: "v", number: "1", sid: "GEN 1:1" }, + "In the beginning ", + { type: "verse", marker: "v", number: "2", sid: "GEN 1:2" }, + { type: "chapter", marker: "c", number: "2", sid: "GEN 2" }, + { type: "chapter", marker: "c", number: "3", sid: "GEN 3" }, + ], + } + """ + )!; + + /// + /// Genesis 1:1 in USX (with additional blank chapters). + /// + public static readonly string UsxGen1V1WithBlankChapters = $""" + + + + In the beginning + + + + + """; + + /// + /// Genesis 1:1 in USJ (with blank verses). + /// + public static readonly Usj UsjGen1V1WithBlankVerses = JsonConvert.DeserializeObject( + $$""" + { + type: "{{Usj.UsjType}}", + version: "{{Usj.UsjVersion}}", + content: [ + { type: "book", marker: "id", code: "GEN" }, + { type: "chapter", marker: "c", number: "1", sid: "GEN 1" }, + { type: "verse", marker: "v", number: "1", sid: "GEN 1:1" }, + "In the beginning ", + { type: "verse", marker: "v", number: "2", sid: "GEN 1:2" }, + { type: "verse", marker: "v", number: "3", sid: "GEN 1:3" }, + { type: "verse", marker: "v", number: "4", sid: "GEN 1:4" }, + ], + } + """ + )!; + + /// + /// Genesis 1:1 in USX (with blank verses). + /// + + + public static readonly string UsxGen1V1WithBlankVerses = $""" + + + + In the beginning + + + + + + """; + + /// + /// Genesis 1:1 in USJ (with no sids). + /// + public static readonly Usj UsjGen1V1WithNoSids = JsonConvert.DeserializeObject( + $$""" + { + type: "{{Usj.UsjType}}", + version: "{{Usj.UsjVersion}}", + content: [ + { type: "book", marker: "id", code: "GEN" }, + { type: "chapter", marker: "c", number: "1"}, + { type: "verse", marker: "v", number: "1" }, + "In the beginning ", + { type: "verse", marker: "v", number: "2" }, + { type: "verse", marker: "v", number: "3" }, + { type: "verse", marker: "v", number: "4" }, + ], + } + """ + )!; + + /// + /// Genesis 1:1 in USJ (with no sids). + /// + public static readonly string UsxGen1V1WithNoSids = $""" + + + + In the beginning + + + + """; } diff --git a/test/SIL.Converters.Usj.Tests/UsjToUsxTests.cs b/test/SIL.Converters.Usj.Tests/UsjToUsxTests.cs index b45f3f06e8..ecaf3b8e3e 100644 --- a/test/SIL.Converters.Usj.Tests/UsjToUsxTests.cs +++ b/test/SIL.Converters.Usj.Tests/UsjToUsxTests.cs @@ -1,4 +1,5 @@ using System; +using System.Xml; using NUnit.Framework; namespace SIL.Converters.Usj.Tests; @@ -14,6 +15,15 @@ public void ShouldConvertFromEmptyUsjToUsx() Assert.That(usx, Is.EqualTo(TestData.UsxEmpty)); } + [Test] + public void ShouldConvertFromEmptyUsjToUsx_Roundtrip() + { + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(TestData.UsjEmpty); + Usj usj = UsxToUsj.UsxStringToUsj(usx); + Assert.That(usj, Is.EqualTo(TestData.UsjEmpty).UsingPropertiesComparer()); + } + [Test] public void ShouldConvertFromUsjToUsx() { @@ -24,7 +34,7 @@ public void ShouldConvertFromUsjToUsx() } [Test] - public void ShouldConvertFromUsjToUsxAndBack() + public void ShouldConvertFromUsjToUsx_Roundtrip() { var usjToUsx = new UsjToUsx(); string usx = usjToUsx.UsjToUsxString(TestData.UsjGen1V1); @@ -32,6 +42,64 @@ public void ShouldConvertFromUsjToUsxAndBack() Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1).UsingPropertiesComparer()); } + [Test] + public void ShouldConvertFromUsjToUsxXmlDocument() + { + // Setup + var expectedUsx = new XmlDocument { PreserveWhitespace = true }; + expectedUsx.LoadXml(TestData.RemoveXmlWhiteSpace(TestData.UsxGen1V1)); + + // SUT + var usjToUsx = new UsjToUsx(); + XmlDocument usx = usjToUsx.UsjToUsxXmlDocument(TestData.UsjGen1V1); + Assert.That(usx, Is.EqualTo(expectedUsx)); + } + + [Test] + public void ShouldConvertFromUsjToUsxXmlDocument_Roundtrip() + { + var usjToUsx = new UsjToUsx(); + XmlDocument usx = usjToUsx.UsjToUsxXmlDocument(TestData.UsjGen1V1); + Usj usj = UsxToUsj.UsxXmlDocumentToUsj(usx); + Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1).UsingPropertiesComparer()); + } + + [Test] + public void ShouldConvertFromUsjToUsxWithBlankChapters() + { + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(TestData.UsjGen1V1WithBlankChapters); + usx = TestData.RemoveXmlWhiteSpace(usx); + Assert.That(usx, Is.EqualTo(TestData.RemoveXmlWhiteSpace(TestData.UsxGen1V1WithBlankChapters))); + } + + [Test] + public void ShouldConvertFromUsjToUsxWithBlankChapters_Roundtrip() + { + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(TestData.UsjGen1V1WithBlankChapters); + Usj usj = UsxToUsj.UsxStringToUsj(usx); + Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1WithBlankChapters).UsingPropertiesComparer()); + } + + [Test] + public void ShouldConvertFromUsjToUsxWithBlankVerses() + { + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(TestData.UsjGen1V1WithBlankVerses); + usx = TestData.RemoveXmlWhiteSpace(usx); + Assert.That(usx, Is.EqualTo(TestData.RemoveXmlWhiteSpace(TestData.UsxGen1V1WithBlankVerses))); + } + + [Test] + public void ShouldConvertFromUsjToUsxWithBlankVerses_Roundtrip() + { + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(TestData.UsjGen1V1WithBlankVerses); + Usj usj = UsxToUsj.UsxStringToUsj(usx); + Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1WithBlankVerses).UsingPropertiesComparer()); + } + [Test] public void ShouldConvertFromUsjWithImpliedParagraphsToUsx() { @@ -41,6 +109,15 @@ public void ShouldConvertFromUsjWithImpliedParagraphsToUsx() Assert.That(usx, Is.EqualTo(TestData.RemoveXmlWhiteSpace(TestData.UsxGen1V1ImpliedPara))); } + [Test] + public void ShouldConvertFromUsjWithImpliedParagraphsToUsx_Roundtrip() + { + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(TestData.UsjGen1V1ImpliedPara); + Usj usj = UsxToUsj.UsxStringToUsj(usx); + Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1ImpliedPara).UsingPropertiesComparer()); + } + [Test] public void ShouldConvertFromUsjWithNonStandardFeaturesToUsx() { @@ -50,6 +127,33 @@ public void ShouldConvertFromUsjWithNonStandardFeaturesToUsx() Assert.That(usx, Is.EqualTo(TestData.RemoveXmlWhiteSpace(TestData.UsxGen1V1Nonstandard))); } + [Test] + public void ShouldConvertFromUsjWithNonStandardFeaturesToUsx_Roundtrip() + { + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(TestData.UsjGen1V1Nonstandard); + Usj usj = UsxToUsj.UsxStringToUsj(usx); + Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1Nonstandard).UsingPropertiesComparer()); + } + + [Test] + public void ShouldConvertFromUsjToUsxWithNoSids() + { + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(TestData.UsjGen1V1WithNoSids); + usx = TestData.RemoveXmlWhiteSpace(usx); + Assert.That(usx, Is.EqualTo(TestData.RemoveXmlWhiteSpace(TestData.UsxGen1V1WithNoSids))); + } + + [Test] + public void ShouldConvertFromUsjToUsxWithNoSids_Roundtrip() + { + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(TestData.UsjGen1V1WithNoSids); + Usj usj = UsxToUsj.UsxStringToUsj(usx); + Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1WithNoSids).UsingPropertiesComparer()); + } + [Test] public void ShouldConvertFromUsjToUsxWithSpecialWhiteSpace() { @@ -59,6 +163,15 @@ public void ShouldConvertFromUsjToUsxWithSpecialWhiteSpace() Assert.That(usx, Is.EqualTo(TestData.RemoveXmlWhiteSpace(TestData.UsxGen1V1Whitespace))); } + [Test] + public void ShouldConvertFromUsjToUsxWithSpecialWhiteSpace_Roundtrip() + { + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(TestData.UsjGen1V1Whitespace); + Usj usj = UsxToUsj.UsxStringToUsj(usx); + Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1Whitespace).UsingPropertiesComparer()); + } + [Test] public void ShouldConvertFromUsjToUsxWithTable() { @@ -68,6 +181,15 @@ public void ShouldConvertFromUsjToUsxWithTable() Assert.That(usx, Is.EqualTo(TestData.RemoveXmlWhiteSpace(TestData.UsxGen1V1WithTable))); } + [Test] + public void ShouldConvertFromUsjToUsxWithTable_Roundtrip() + { + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(TestData.UsjGen1V1WithTable); + Usj usj = UsxToUsj.UsxStringToUsj(usx); + Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1WithTable).UsingPropertiesComparer()); + } + [Test] public void ShouldNotAllowInvalidContent() { diff --git a/test/SIL.Converters.Usj.Tests/UsxToUsjTests.cs b/test/SIL.Converters.Usj.Tests/UsxToUsjTests.cs index c68872ed1f..9af09acb89 100644 --- a/test/SIL.Converters.Usj.Tests/UsxToUsjTests.cs +++ b/test/SIL.Converters.Usj.Tests/UsxToUsjTests.cs @@ -1,3 +1,4 @@ +using System; using System.Xml; using NUnit.Framework; @@ -14,16 +15,19 @@ public void ShouldConvertFromEmptyUsxToUsj() } [Test] - public void ShouldConvertFromNullToUsj() + public void ShouldConvertFromEmptyUsxToUsj_Roundtrip() { - Usj usj = UsxToUsj.UsxStringToUsj(null); - Assert.That(usj, Is.EqualTo(TestData.UsjEmpty).UsingPropertiesComparer()); + Usj usj = UsxToUsj.UsxStringToUsj(TestData.UsxEmpty); + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(usj); + usx = TestData.RemoveXmlWhiteSpace(usx); + Assert.That(usx, Is.EqualTo(TestData.RemoveXmlWhiteSpace(TestData.UsxEmpty))); } [Test] - public void ShouldConvertFromNullXmlDocumentToUsj() + public void ShouldConvertFromNullToUsj() { - Usj usj = UsxToUsj.UsxXmlDocumentToUsj(null); + Usj usj = UsxToUsj.UsxStringToUsj(null); Assert.That(usj, Is.EqualTo(TestData.UsjEmpty).UsingPropertiesComparer()); } @@ -34,6 +38,16 @@ public void ShouldConvertFromUsxToUsj() Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1).UsingPropertiesComparer()); } + [Test] + public void ShouldConvertFromUsxToUsj_Roundtrip() + { + Usj usj = UsxToUsj.UsxStringToUsj(TestData.UsxGen1V1); + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(usj); + usx = TestData.RemoveXmlWhiteSpace(usx); + Assert.That(usx, Is.EqualTo(TestData.RemoveXmlWhiteSpace(TestData.UsxGen1V1))); + } + [Test] public void ShouldConvertFromUsxToUsjAndRemoveSpecificAttributes() { @@ -41,6 +55,51 @@ public void ShouldConvertFromUsxToUsjAndRemoveSpecificAttributes() Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1).UsingPropertiesComparer()); } + [Test] + public void ShouldConvertFromUsxToUsjAndRemoveSpecificAttributes_Roundtrip() + { + // NOTE: We do not compare with the original, as invalid attributes are removed + Usj usj = UsxToUsj.UsxStringToUsj(TestData.UsxGen1V1WithAttributesToRemove); + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(usj); + usx = TestData.RemoveXmlWhiteSpace(usx); + Assert.That(usx, Is.EqualTo(TestData.RemoveXmlWhiteSpace(TestData.UsxGen1V1))); + } + + [Test] + public void ShouldConvertFromUsxToUsjWithBlankChapters() + { + Usj usj = UsxToUsj.UsxStringToUsj(TestData.UsxGen1V1WithBlankChapters); + Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1WithBlankChapters).UsingPropertiesComparer()); + } + + [Test] + public void ShouldConvertFromUsxToUsjWithBlankChapters_Roundtrip() + { + Usj usj = UsxToUsj.UsxStringToUsj(TestData.UsxGen1V1WithBlankChapters); + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(usj); + usx = TestData.RemoveXmlWhiteSpace(usx); + Assert.That(usx, Is.EqualTo(TestData.RemoveXmlWhiteSpace(TestData.UsxGen1V1WithBlankChapters))); + } + + [Test] + public void ShouldConvertFromUsxToUsjWithBlankVerses() + { + Usj usj = UsxToUsj.UsxStringToUsj(TestData.UsxGen1V1WithBlankVerses); + Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1WithBlankVerses).UsingPropertiesComparer()); + } + + [Test] + public void ShouldConvertFromUsxToUsjWithBlankVerses_Roundtrip() + { + Usj usj = UsxToUsj.UsxStringToUsj(TestData.UsxGen1V1WithBlankVerses); + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(usj); + usx = TestData.RemoveXmlWhiteSpace(usx); + Assert.That(usx, Is.EqualTo(TestData.RemoveXmlWhiteSpace(TestData.UsxGen1V1WithBlankVerses))); + } + [Test] public void ShouldConvertFromUsxToUsjWithImpliedParagraphs() { @@ -48,6 +107,16 @@ public void ShouldConvertFromUsxToUsjWithImpliedParagraphs() Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1ImpliedPara).UsingPropertiesComparer()); } + [Test] + public void ShouldConvertFromUsxToUsjWithImpliedParagraphs_Roundtrip() + { + Usj usj = UsxToUsj.UsxStringToUsj(TestData.UsxGen1V1ImpliedPara); + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(usj); + usx = TestData.RemoveXmlWhiteSpace(usx); + Assert.That(usx, Is.EqualTo(TestData.RemoveXmlWhiteSpace(TestData.UsxGen1V1ImpliedPara))); + } + [Test] public void ShouldConvertFromUsxToUsjWithSpecialWhiteSpace() { @@ -55,6 +124,16 @@ public void ShouldConvertFromUsxToUsjWithSpecialWhiteSpace() Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1Whitespace).UsingPropertiesComparer()); } + [Test] + public void ShouldConvertFromUsxToUsjWithSpecialWhiteSpace_Roundtrip() + { + Usj usj = UsxToUsj.UsxStringToUsj(TestData.UsxGen1V1Whitespace); + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(usj); + usx = TestData.RemoveXmlWhiteSpace(usx); + Assert.That(usx, Is.EqualTo(TestData.RemoveXmlWhiteSpace(TestData.UsxGen1V1Whitespace))); + } + [Test] public void ShouldConvertFromUsxToUsjWithNonStandardFeatures() { @@ -62,6 +141,33 @@ public void ShouldConvertFromUsxToUsjWithNonStandardFeatures() Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1Nonstandard).UsingPropertiesComparer()); } + [Test] + public void ShouldConvertFromUsxToUsjWithNonStandardFeatures_RoundTrip() + { + Usj usj = UsxToUsj.UsxStringToUsj(TestData.UsxGen1V1Nonstandard); + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(usj); + usx = TestData.RemoveXmlWhiteSpace(usx); + Assert.That(usx, Is.EqualTo(TestData.RemoveXmlWhiteSpace(TestData.UsxGen1V1Nonstandard))); + } + + [Test] + public void ShouldConvertFromUsxToUsjWithNoSids() + { + Usj usj = UsxToUsj.UsxStringToUsj(TestData.UsxGen1V1WithNoSids); + Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1WithNoSids).UsingPropertiesComparer()); + } + + [Test] + public void ShouldConvertFromUsxToUsjWithNoSids_Roundtrip() + { + Usj usj = UsxToUsj.UsxStringToUsj(TestData.UsxGen1V1WithNoSids); + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(usj); + usx = TestData.RemoveXmlWhiteSpace(usx); + Assert.That(usx, Is.EqualTo(TestData.RemoveXmlWhiteSpace(TestData.UsxGen1V1WithNoSids))); + } + [Test] public void ShouldConvertFromUsxToUsjWithTable() { @@ -69,6 +175,16 @@ public void ShouldConvertFromUsxToUsjWithTable() Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1WithTable).UsingPropertiesComparer()); } + [Test] + public void ShouldConvertFromUsxToUsjWithTable_Roundtrip() + { + Usj usj = UsxToUsj.UsxStringToUsj(TestData.UsxGen1V1WithTable); + var usjToUsx = new UsjToUsx(); + string usx = usjToUsx.UsjToUsxString(usj); + usx = TestData.RemoveXmlWhiteSpace(usx); + Assert.That(usx, Is.EqualTo(TestData.RemoveXmlWhiteSpace(TestData.UsxGen1V1WithTable))); + } + [Test] public void ShouldConvertFromXmlDocumentToUsj() { @@ -77,4 +193,23 @@ public void ShouldConvertFromXmlDocumentToUsj() Usj usj = UsxToUsj.UsxXmlDocumentToUsj(document); Assert.That(usj, Is.EqualTo(TestData.UsjGen1V1).UsingPropertiesComparer()); } + + [Test] + public void ShouldConvertFromXmlDocumentToUsj_Roundtrip() + { + XmlDocument document = new XmlDocument { PreserveWhitespace = true }; + document.LoadXml(TestData.RemoveXmlWhiteSpace(TestData.UsxGen1V1)); + Usj usj = UsxToUsj.UsxXmlDocumentToUsj(document); + var usjToUsx = new UsjToUsx(); + XmlDocument usx = usjToUsx.UsjToUsxXmlDocument(usj); + Assert.That(usx, Is.EqualTo(document).UsingPropertiesComparer()); + } + + [Test] + public void ShouldNotConvertFromNullXmlDocumentToUsj() => + Assert.Throws(() => UsxToUsj.UsxXmlDocumentToUsj(null)); + + [Test] + public void ShouldNotConvertFromNonPreserveWhitespaceXmlDocumentToUsj() => + Assert.Throws(() => UsxToUsj.UsxXmlDocumentToUsj(new XmlDocument())); } From c80505d675b375c8f5b17a5e61a768348c4b4145 Mon Sep 17 00:00:00 2001 From: Peter Chapman Date: Mon, 10 Feb 2025 09:11:15 +1300 Subject: [PATCH 4/4] Code review updates --- src/SIL.Converters.Usj/UsjToUsx.cs | 47 +++++++++++++---------- src/SIL.Converters.Usj/UsxToUsj.cs | 8 ++-- test/SIL.Converters.Usj.Tests/TestData.cs | 2 +- 3 files changed, 32 insertions(+), 25 deletions(-) diff --git a/src/SIL.Converters.Usj/UsjToUsx.cs b/src/SIL.Converters.Usj/UsjToUsx.cs index 2392a0348c..700a95ebb7 100644 --- a/src/SIL.Converters.Usj/UsjToUsx.cs +++ b/src/SIL.Converters.Usj/UsjToUsx.cs @@ -16,20 +16,27 @@ namespace SIL.Converters.Usj /// public class UsjToUsx { - private string _chapterEid; - private string _verseEid; + /// + /// The eid which will be written for the current chapter. + /// + private string _currentChapterEid; + + /// + /// The eid which will be written for the current verse. + /// + private string _currentVerseEid; private XmlElement CreateVerseEndElement(XmlDocument usxDoc) { XmlElement eidElement = usxDoc.CreateElement("verse"); - eidElement.SetAttribute("eid", _verseEid); + eidElement.SetAttribute("eid", _currentVerseEid); return eidElement; } private XmlElement CreateChapterEndElement(XmlDocument usxDoc) { XmlElement eidElement = usxDoc.CreateElement("chapter"); - eidElement.SetAttribute("eid", _chapterEid); + eidElement.SetAttribute("eid", _currentChapterEid); return eidElement; } @@ -110,42 +117,42 @@ bool isLastItem throw new ArgumentOutOfRangeException(nameof(markerContent)); } - // Store the previous verse eid so we can close them in the correct place + // Store the previous verse eid so we can close that verse in the correct place string lastVerseEid = null; // Create chapter and verse end elements from SID attributes. - if (_verseEid != null && (type == "verse" || (parentElement.Name == "para" && isLastItem))) + if (_currentVerseEid != null && (type == "verse" || (parentElement.Name == "para" && isLastItem))) { eidElement = CreateVerseEndElement(usxDoc); - lastVerseEid = _verseEid; - _verseEid = null; + lastVerseEid = _currentVerseEid; + _currentVerseEid = null; } if (type == "verse" && usjMarker?.Sid != null) { - _verseEid = usjMarker.Sid; + _currentVerseEid = usjMarker.Sid; } - if (_chapterEid != null && (type == "chapter" || (type == "para" && isLastItem))) + if (_currentChapterEid != null && (type == "chapter" || (type == "para" && isLastItem))) { eidElement = CreateChapterEndElement(usxDoc); - _chapterEid = null; + _currentChapterEid = null; } if (type == "chapter" && usjMarker?.Sid != null) { - _chapterEid = usjMarker.Sid; + _currentChapterEid = usjMarker.Sid; } // See if we are at a new verse - if (eidElement != null && isLastItem && _verseEid != null && _verseEid != lastVerseEid) + if (eidElement != null && isLastItem && _currentVerseEid != null && _currentVerseEid != lastVerseEid) { // Write the eid element for the previous verse parentElement.AppendChild(eidElement); // Ensure that eid element for the current verse is not written eidElement = null; - _verseEid = null; + _currentVerseEid = null; } // Append to parent to close the verse or chapter before this new node @@ -166,18 +173,18 @@ bool isLastItem // Allow for final chapter and verse end elements at the end of an implied para. if (isLastItem && parentElement.Name == Usx.UsxType) { - if (_verseEid != null) + if (_currentVerseEid != null) { parentElement.AppendChild(CreateVerseEndElement(usxDoc)); } - if (_chapterEid != null) + if (_currentChapterEid != null) { parentElement.AppendChild(CreateChapterEndElement(usxDoc)); } - _verseEid = null; - _chapterEid = null; + _currentVerseEid = null; + _currentChapterEid = null; } } @@ -198,8 +205,8 @@ private void UsjToUsxDom(IUsj usj, XmlDocument usxDoc) public XmlDocument UsjToUsxXmlDocument(IUsj usj) { // Reset any instance variables - _chapterEid = null; - _verseEid = null; + _currentChapterEid = null; + _currentVerseEid = null; // Create the USX document XmlDocument usxDoc = new XmlDocument { PreserveWhitespace = true }; diff --git a/src/SIL.Converters.Usj/UsxToUsj.cs b/src/SIL.Converters.Usj/UsxToUsj.cs index ced7db931f..61ccb5864b 100644 --- a/src/SIL.Converters.Usj/UsxToUsj.cs +++ b/src/SIL.Converters.Usj/UsxToUsj.cs @@ -60,20 +60,20 @@ private static (T, bool) UsxDomToUsjRecurse(XmlElement usxElement) // Set the attributes, placing unknown attributes in the Json Extension Data // This code implements the Typescript code: outObj = { ...outObj, ...attributes }; - foreach (KeyValuePair attrib in attributes) + foreach (KeyValuePair attribute in attributes) { PropertyInfo property = outObj .GetType() - .GetProperty(attrib.Key, BindingFlags.Public | BindingFlags.Instance | BindingFlags.IgnoreCase); + .GetProperty(attribute.Key, BindingFlags.Public | BindingFlags.Instance | BindingFlags.IgnoreCase); if (property != null && property.CanWrite) { // Set the property if it exists - property.SetValue(outObj, attrib.Value); + property.SetValue(outObj, attribute.Value); } else { // Add to the Json Extension Data if the property does not exist - outObj.AdditionalData[attrib.Key] = attrib.Value; + outObj.AdditionalData[attribute.Key] = attribute.Value; } } diff --git a/test/SIL.Converters.Usj.Tests/TestData.cs b/test/SIL.Converters.Usj.Tests/TestData.cs index 59bedc7711..84f66b38ff 100644 --- a/test/SIL.Converters.Usj.Tests/TestData.cs +++ b/test/SIL.Converters.Usj.Tests/TestData.cs @@ -453,7 +453,7 @@ public static string RemoveXmlWhiteSpace(string xml) )!; /// - /// Genesis 1:1 in USJ (with no sids). + /// Genesis 1:1 in USX (with no sids). /// public static readonly string UsxGen1V1WithNoSids = $"""