From 72076d17d2d2d6a0db54aa168366fa9a6ab646e0 Mon Sep 17 00:00:00 2001 From: Jaro Date: Mon, 12 Jun 2023 16:47:25 +0200 Subject: [PATCH] Internally mark hard line breaks. --- .../Text/ParagraphLayout/Internal/Layout.hs | 27 ++++++++++++++----- .../ParagraphLayout/Internal/ProtoFragment.hs | 8 +++--- .../Text/ParagraphLayout/Internal/Rich.hs | 16 +++++++---- src/Data/Text/ParagraphLayout/Internal/Run.hs | 5 ++++ .../Text/ParagraphLayout/Internal/RunSpec.hs | 24 +++++++++++++++++ 5 files changed, 66 insertions(+), 14 deletions(-) diff --git a/src/Data/Text/ParagraphLayout/Internal/Layout.hs b/src/Data/Text/ParagraphLayout/Internal/Layout.hs index 10b0e4b..bf7ccf6 100644 --- a/src/Data/Text/ParagraphLayout/Internal/Layout.hs +++ b/src/Data/Text/ParagraphLayout/Internal/Layout.hs @@ -246,12 +246,22 @@ lastSpanBoxes xs = case reverse xs of -- output list will contain a run of zero characters. This can be used to -- correctly size an empty line. -- +-- If there is a hard line break in the input, the run containing it will have +-- its `runHardBreak` set to `True`. +-- -- If there is no hard line break in the input, the first output list will -- contain the whole input, and the second output list will be empty. hardSplit :: NonEmpty (WithSpan d Run) -> ([WithSpan d Run], [WithSpan d Run]) -hardSplit runs = allowFstEmpty $ trimFst $ NonEmpty.last $ splits +hardSplit runs = case reverse hSplits of + [] -> noSplit + (splitRuns : _) -> forcedSplit splitRuns where - trimFst (runs1, runs2) = (trim runs1, runs2) + noSplit = + (NonEmpty.toList (trim runs), []) + forcedSplit (runs1, runs2) = + (NonEmpty.toList $ markHard $ trim runs1, runs2) + markHard = mapLast markHard' + markHard' (WithSpan rs x) = WithSpan rs x { runHardBreak = True } trim = trimTextsStartPreserve isStartSpace . trimTextsEndPreserve isEndSpace @@ -259,13 +269,17 @@ hardSplit runs = allowFstEmpty $ trimFst $ NonEmpty.last $ splits -- TODO: Consider optimising. -- We do not need to look for any line breaks further than the -- shortest hard break. - splits = noSplit :| hSplits - noSplit = (runs, []) hSplits = nonEmptyFsts $ -- from longest to shortest splitTextsBy (map fst . filter isHard . runLineBreaks) runs isHard (_, status) = status == BreakStatus.Hard +-- | Apply a function to the last element of the non-empty list. +mapLast :: (a -> a) -> NonEmpty a -> NonEmpty a +mapLast f xs = case NonEmpty.uncons xs of + (x, Nothing) -> f x :| [] + (x, Just rest) -> NonEmpty.cons x $ mapLast f rest + -- | Treat a list of runs as a contiguous sequence, -- and find all possible ways to split them into two non-empty lists, -- using soft line break opportunities (typically after words) and then @@ -312,10 +326,11 @@ layoutRunsH runs = map layoutRunH runs layoutRunH :: WithSpan d Run -> ProtoFragmentWithSpan d layoutRunH (WithSpan rs run) = WithSpan rs pf where - pf = PF.protoFragmentH dir lvl glyphs + pf = PF.protoFragmentH dir lvl glyphs hard glyphs = shapeRun (WithSpan rs run) dir = runDirection run lvl = runLevel run + hard = runHardBreak run -- | Calculate layout for the given run independently of its position. shapeRun :: WithSpan d Run -> [(GlyphInfo, GlyphPos)] @@ -356,7 +371,7 @@ runBreaksFromSpan :: Run -> [(Int, a)] -> [(Int, a)] runBreaksFromSpan run spanBreaks = dropWhile (not . valid) $ subOffsetsDesc (runOffsetInSpan run) spanBreaks where - valid (off, _) = off < runLength + valid (off, _) = off <= runLength runLength = lengthWord8 $ getText run -- | Predicate for characters that can be potentially removed from the diff --git a/src/Data/Text/ParagraphLayout/Internal/ProtoFragment.hs b/src/Data/Text/ParagraphLayout/Internal/ProtoFragment.hs index 8d85cfe..fd3dd3d 100644 --- a/src/Data/Text/ParagraphLayout/Internal/ProtoFragment.hs +++ b/src/Data/Text/ParagraphLayout/Internal/ProtoFragment.hs @@ -1,5 +1,5 @@ module Data.Text.ParagraphLayout.Internal.ProtoFragment - ( ProtoFragment (direction, advance, glyphs) + ( ProtoFragment (direction, advance, glyphs, hardBreak) , protoFragmentH ) where @@ -20,13 +20,15 @@ data ProtoFragment = ProtoFragment -- ^ Total advance of glyphs in this fragment, -- depending on the text direction. , glyphs :: [(GlyphInfo, GlyphPos)] + , hardBreak :: Bool + -- ^ Marks fragment that ends with a forced line break. } -- | Construct a `ProtoFragment`, automatically calculating the total advance -- for a horizontal text direction. -protoFragmentH :: Direction -> BiDi.Level -> [(GlyphInfo, GlyphPos)] -> +protoFragmentH :: Direction -> BiDi.Level -> [(GlyphInfo, GlyphPos)] -> Bool -> ProtoFragment -protoFragmentH dir lvl gs = ProtoFragment dir lvl adv gs +protoFragmentH dir lvl gs hard = ProtoFragment dir lvl adv gs hard where adv = sum $ map (x_advance . snd) gs diff --git a/src/Data/Text/ParagraphLayout/Internal/Rich.hs b/src/Data/Text/ParagraphLayout/Internal/Rich.hs index 12121cb..96f0212 100644 --- a/src/Data/Text/ParagraphLayout/Internal/Rich.hs +++ b/src/Data/Text/ParagraphLayout/Internal/Rich.hs @@ -66,8 +66,8 @@ resolveSpans p = do let lang = textLanguage textOpts -- TODO: Allow BiDi embedding/isolation for inner nodes. let pLevels = textLevels (textDirection rootTextOpts) pText - let lBreaks = paragraphBreaks breakLine pText lang - let cBreaks = paragraphBreaks breakCharacter pText lang + let lBreaks = paragraphBreaksDesc breakLine pText lang + let cBreaks = paragraphBreaksDesc breakCharacter pText lang -- TODO: Optimise. This has time complexity O(n*s), where n is number of -- characters and s is number of resolved spans. -- Maybe include byte offsets in the TextLevels data structure? @@ -85,6 +85,12 @@ resolveSpans p = do , RS.spanCharacterBreaks = subOffsetsDesc (sStart - pStart) cBreaks } -paragraphBreaks :: (LocaleName -> Breaker a) -> Text -> String -> [(Int, a)] -paragraphBreaks breakFunc txt lang = - breaksDesc (breakFunc (locale lang LBAuto)) txt +paragraphBreaksDesc :: (LocaleName -> Breaker a) -> Text -> String -> [(Int, a)] +paragraphBreaksDesc breakFunc txt lang = + -- Workaround: We are interested in the type of the end-of-text break + -- (if it is hard, that line needs to be always visible), + -- but `breaksDesc` does not provide it. + -- + -- TODO: Consider optimising by creating a custom reimplementation + -- of `Data.Text.ICU.breaksRight`. + reverse $ breaksAsc (breakFunc (locale lang LBAuto)) txt diff --git a/src/Data/Text/ParagraphLayout/Internal/Run.hs b/src/Data/Text/ParagraphLayout/Internal/Run.hs index ae3c245..5f118bd 100644 --- a/src/Data/Text/ParagraphLayout/Internal/Run.hs +++ b/src/Data/Text/ParagraphLayout/Internal/Run.hs @@ -25,6 +25,10 @@ data Run = Run , runLevel :: Level , runDirection :: Direction , runScript :: Maybe ScriptCode + , runHardBreak :: Bool + -- ^ Marks run that ends with a forced line break. + -- Those should prevent creation of invisible line boxes + -- according to . } deriving (Eq, Show) @@ -87,6 +91,7 @@ spanToRuns s = snd $ mapAccumL run 0 $ protoRuns zipper levels , runLevel = PR.level pr , runDirection = levelDirectionH $ PR.level pr , runScript = Just $ PR.script pr + , runHardBreak = False } ) diff --git a/test/Data/Text/ParagraphLayout/Internal/RunSpec.hs b/test/Data/Text/ParagraphLayout/Internal/RunSpec.hs index 355cb88..0ad2846 100644 --- a/test/Data/Text/ParagraphLayout/Internal/RunSpec.hs +++ b/test/Data/Text/ParagraphLayout/Internal/RunSpec.hs @@ -60,6 +60,7 @@ spec = do , runLevel = 0 , runDirection = DirLTR , runScript = Just "Latn" + , runHardBreak = False } ] it "handles Arabic hello" $ do @@ -72,6 +73,7 @@ spec = do , runLevel = 1 , runDirection = DirRTL , runScript = Just "Arab" + , runHardBreak = False } ] it "handles Serbian with mixed script" $ do @@ -85,6 +87,7 @@ spec = do , runLevel = 0 , runDirection = DirLTR , runScript = Just "Latn" + , runHardBreak = False } , Run { runOffsetInSpan = 12 @@ -92,6 +95,7 @@ spec = do , runLevel = 0 , runDirection = DirLTR , runScript = Just "Cyrl" + , runHardBreak = False } ] it "handles mixed direction with base LTR" $ do @@ -105,6 +109,7 @@ spec = do , runLevel = 0 , runDirection = DirLTR , runScript = Just "Latn" + , runHardBreak = False } , Run { runOffsetInSpan = 7 @@ -112,6 +117,7 @@ spec = do , runLevel = 1 , runDirection = DirRTL , runScript = Just "Arab" + , runHardBreak = False } , Run { runOffsetInSpan = 13 @@ -119,6 +125,7 @@ spec = do , runLevel = 0 , runDirection = DirLTR , runScript = Just "Latn" + , runHardBreak = False } ] it "handles mixed direction with base RTL" $ do @@ -132,6 +139,7 @@ spec = do , runLevel = 2 , runDirection = DirLTR , runScript = Just "Latn" + , runHardBreak = False } , Run { runOffsetInSpan = 7 @@ -139,6 +147,7 @@ spec = do , runLevel = 1 , runDirection = DirRTL , runScript = Just "Arab" + , runHardBreak = False } , Run { runOffsetInSpan = 13 @@ -146,6 +155,7 @@ spec = do , runLevel = 2 , runDirection = DirLTR , runScript = Just "Latn" + , runHardBreak = False } ] it "handles Arabic text with English inside" $ do @@ -159,6 +169,7 @@ spec = do , runLevel = 1 , runDirection = DirRTL , runScript = Just "Arab" + , runHardBreak = False } , Run { runOffsetInSpan = 5 @@ -166,6 +177,7 @@ spec = do , runLevel = 2 , runDirection = DirLTR , runScript = Just "Latn" + , runHardBreak = False } , Run { runOffsetInSpan = 14 @@ -173,6 +185,7 @@ spec = do , runLevel = 1 , runDirection = DirRTL , runScript = Just "Arab" + , runHardBreak = False } , Run { runOffsetInSpan = 79 @@ -180,6 +193,7 @@ spec = do , runLevel = 2 , runDirection = DirLTR , runScript = Just "Latn" + , runHardBreak = False } , Run { runOffsetInSpan = 82 @@ -187,6 +201,7 @@ spec = do , runLevel = 1 , runDirection = DirRTL , runScript = Just "Zyyy" + , runHardBreak = False } ] it "handles English text with Arabic inside" $ do @@ -200,6 +215,7 @@ spec = do , runLevel = 0 , runDirection = DirLTR , runScript = Just "Latn" + , runHardBreak = False } , Run { runOffsetInSpan = 13 @@ -207,6 +223,7 @@ spec = do , runLevel = 1 , runDirection = DirRTL , runScript = Just "Arab" + , runHardBreak = False } , Run { runOffsetInSpan = 47 @@ -214,6 +231,7 @@ spec = do , runLevel = 0 , runDirection = DirLTR , runScript = Just "Latn" + , runHardBreak = False } ] -- Unrealistic example where text changes direction @@ -229,6 +247,7 @@ spec = do , runLevel = 1 , runDirection = DirRTL , runScript = Just "Latn" + , runHardBreak = False } -- direction change , Run @@ -237,6 +256,7 @@ spec = do , runLevel = 2 , runDirection = DirLTR , runScript = Just "Latn" + , runHardBreak = False } -- direction change , Run @@ -245,6 +265,7 @@ spec = do , runLevel = 1 , runDirection = DirRTL , runScript = Just "Latn" + , runHardBreak = False } -- script change , Run @@ -253,6 +274,7 @@ spec = do , runLevel = 1 , runDirection = DirRTL , runScript = Just "Cyrl" + , runHardBreak = False } -- direction change , Run @@ -261,6 +283,7 @@ spec = do , runLevel = 2 , runDirection = DirLTR , runScript = Just "Cyrl" + , runHardBreak = False } -- direction change , Run @@ -269,5 +292,6 @@ spec = do , runLevel = 1 , runDirection = DirRTL , runScript = Just "Cyrl" + , runHardBreak = False } ] -- 2.30.2