Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Exact printer #9436

Draft
wants to merge 41 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
d12c1cd
Initial draft exact printer
jappeace Nov 5, 2023
f434639
add section
jappeace Nov 13, 2023
c36bc19
Add some examples on where we fail
jappeace Nov 18, 2023
5f9795f
I think two of these may not be neccisary
jappeace Nov 18, 2023
ab66f2c
figured out where comments are deleted
jappeace Nov 18, 2023
1c7d4b8
add just a single comment test, add printing of sections
jappeace Nov 18, 2023
2d6ce6b
can't have exposed modules in executable
jappeace Nov 18, 2023
f624f00
add more tests
jappeace Nov 18, 2023
9462773
make the sections test pass
jappeace Nov 18, 2023
742d750
add more failing examples
jappeace Nov 19, 2023
fcef3d1
add comments on common stanza's
jappeace Nov 19, 2023
46fccd8
better location of comment
jappeace Nov 19, 2023
d752e49
Add comments and whitespace tokens to the lexer
andreabedini Nov 18, 2023
15c2aea
nothunks
jappeace Jan 1, 2024
c5780e0
Revert "Add comments and whitespace tokens to the lexer"
jappeace Jan 1, 2024
a75d51b
add comma test
jappeace Jan 20, 2024
f2608ad
enable comment test
jappeace Jun 8, 2024
90d148c
Add comments and whitespace tokens to the lexer
andreabedini Nov 18, 2023
bcc1460
pain
jappeace Jun 8, 2024
dcaf832
clear traces
jappeace Jun 8, 2024
5518088
re-add ilevel
jappeace Jun 8, 2024
659e3ff
add more test cases
jappeace Jun 9, 2024
66253e0
No dot emit a Whitespace token with a single newline
andreabedini Jun 9, 2024
8edf0ef
parser 2
jappeace Jun 9, 2024
536e46f
add more parser tests
jappeace Jun 9, 2024
4670fe0
add comment parsing
jappeace Jun 9, 2024
268daff
re-add warning on openeing brace
jappeace Jun 9, 2024
1a40cc9
re-add token openbrace
jappeace Jun 9, 2024
2fa195d
trace the choice
jappeace Jun 9, 2024
5dd4eda
fix more whitespace issues
jappeace Jun 9, 2024
a507209
make a better parser
jappeace Jun 9, 2024
9355259
fix spacing issues
jappeace Jun 9, 2024
75242a1
add todo
jappeace Jun 9, 2024
b66779e
capture the comments
jappeace Jun 10, 2024
824d85c
setup basic printing
jappeace Jun 10, 2024
01290c7
add parsing of the comments in the tree per element
jappeace Jun 10, 2024
5559385
make it print comments
jappeace Jun 10, 2024
be7b28e
fix field binding
jappeace Jun 10, 2024
e86b906
remove superflous todo
jappeace Jun 11, 2024
31244e5
fix errorring
jappeace Aug 7, 2024
a870ae4
add conditional test
jappeace Aug 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cabal-syntax/Cabal-syntax.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ library
Distribution.Utils.String
Distribution.Utils.Structured
Distribution.Version
Distribution.PackageDescription.ExactPrint
Language.Haskell.Extension

other-extensions:
Expand Down
64 changes: 53 additions & 11 deletions Cabal-syntax/src/Distribution/Fields/Field.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
{-# LANGUAGE DeriveFoldable #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE StrictData #-}

-- | Cabal-like file AST types: 'Field', 'Section' etc
--
Expand All @@ -12,11 +14,13 @@ module Distribution.Fields.Field
, fieldName
, fieldAnn
, fieldUniverse
, fieldMeta
, FieldLine (..)
, fieldLineAnn
, fieldLineBS
, SectionArg (..)
, sectionArgAnn
, sectionArgContent

-- * Name
, FieldName
Expand All @@ -28,6 +32,12 @@ module Distribution.Fields.Field
-- * Conversions to String
, sectionArgsToString
, fieldLinesToString

-- * meta data
, MetaField(..)
, fieldMeta
, metaComment
, metaAnn
) where

import Data.ByteString (ByteString)
Expand All @@ -47,30 +57,55 @@ import qualified Data.Foldable1 as F1

-- | A Cabal-like file consists of a series of fields (@foo: bar@) and sections (@library ...@).
data Field ann
= Field !(Name ann) [FieldLine ann]
| Section !(Name ann) [SectionArg ann] [Field ann]
= Field (Name ann) [FieldLine ann]
| Section (Name ann) [SectionArg ann] [Field ann]
| Meta (MetaField ann)
deriving (Eq, Show, Functor, Foldable, Traversable)

data MetaField ann = MetaComment ann ByteString
| MetaWhitespace ann ByteString
deriving (Eq, Show, Functor, Foldable, Traversable)

metaComment :: MetaField ann -> Maybe ByteString
metaComment = \case
(MetaComment _ bs) -> Just bs
(MetaWhitespace _ _) -> Nothing

metaAnn :: MetaField ann -> ann
metaAnn = \case
(MetaComment ann _) -> ann
(MetaWhitespace ann _) -> ann

-- | Section of field name
fieldName :: Field ann -> Name ann
fieldName (Field n _) = n
fieldName (Section n _ _) = n
fieldName :: Field ann -> (Maybe (Name ann))
fieldName (Field n _) = Just n
fieldName (Section n _ _) = Just n
fieldName (Meta _) = Nothing

fieldMeta :: Field ann -> Maybe (MetaField ann)
fieldMeta = \case
(Field n _) -> Nothing
(Section n _ _) -> Nothing
(Meta x) -> Just x

fieldAnn :: Field ann -> ann
fieldAnn = nameAnn . fieldName
fieldAnn (Field n _) = nameAnn n
fieldAnn (Section n _ _) = nameAnn n
fieldAnn (Meta x) = metaAnn x

-- | All transitive descendants of 'Field', including itself.
--
-- /Note:/ the resulting list is never empty.
fieldUniverse :: Field ann -> [Field ann]
fieldUniverse f@(Section _ _ fs) = f : concatMap fieldUniverse fs
fieldUniverse f@(Field _ _) = [f]
fieldUniverse (Meta _) = []

-- | A line of text representing the value of a field from a Cabal file.
-- A field may contain multiple lines.
--
-- /Invariant:/ 'ByteString' has no newlines.
data FieldLine ann = FieldLine !ann !ByteString
data FieldLine ann = FieldLine ann ByteString
deriving (Eq, Show, Functor, Foldable, Traversable)

-- | @since 3.0.0.0
Expand All @@ -84,11 +119,11 @@ fieldLineBS (FieldLine _ bs) = bs
-- | Section arguments, e.g. name of the library
data SectionArg ann
= -- | identifier, or something which looks like number. Also many dot numbers, i.e. "7.6.3"
SecArgName !ann !ByteString
SecArgName ann ByteString
| -- | quoted string
SecArgStr !ann !ByteString
SecArgStr ann ByteString
| -- | everything else, mm. operators (e.g. in if-section conditionals)
SecArgOther !ann !ByteString
SecArgOther ann ByteString
deriving (Eq, Show, Functor, Foldable, Traversable)

-- | Extract annotation from 'SectionArg'.
Expand All @@ -97,6 +132,12 @@ sectionArgAnn (SecArgName ann _) = ann
sectionArgAnn (SecArgStr ann _) = ann
sectionArgAnn (SecArgOther ann _) = ann

sectionArgContent :: SectionArg ann -> ByteString
sectionArgContent = \case
SecArgName _ann bs -> bs
SecArgStr _ann bs -> bs
SecArgOther _ann bs -> bs

-------------------------------------------------------------------------------
-- Name
-------------------------------------------------------------------------------
Expand All @@ -106,7 +147,7 @@ type FieldName = ByteString
-- | A field name.
--
-- /Invariant/: 'ByteString' is lower-case ASCII.
data Name ann = Name !ann !FieldName
data Name ann = Name ann FieldName
deriving (Eq, Show, Functor, Foldable, Traversable)

mkName :: ann -> FieldName -> Name ann
Expand Down Expand Up @@ -158,6 +199,7 @@ instance F1.Foldable1 Field where
F1.fold1 (F1.foldMap1 f x :| map (F1.foldMap1 f) ys)
foldMap1 f (Section x ys zs) =
F1.fold1 (F1.foldMap1 f x :| map (F1.foldMap1 f) ys ++ map (F1.foldMap1 f) zs)
foldMap1 f (Meta x) = f $ metaAnn x

-- | @since 3.12.0.0
instance F1.Foldable1 FieldLine where
Expand Down
136 changes: 85 additions & 51 deletions Cabal-syntax/src/Distribution/Fields/Lexer.x
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#endif
{-# OPTIONS_GHC -fno-warn-unused-imports #-}
module Distribution.Fields.Lexer
(ltest, lexToken, Token(..), LToken(..)
(ltest, lexString, lexByteString, lexToken, Token(..), LToken(..)
,bol_section, in_section, in_field_layout, in_field_braces
,mkLexState) where

Expand Down Expand Up @@ -82,85 +82,102 @@ tokens :-
}

<bol_section, bol_field_layout, bol_field_braces> {
@nbspspacetab* @nl { \pos len inp -> checkWhitespace pos len inp >> adjustPos retPos >> lexToken }
-- no @nl here to allow for comments on last line of the file with no trailing \n
$spacetab* "--" $comment* ; -- TODO: check the lack of @nl works here
-- including counting line numbers
@nbspspacetab* @nl { \pos len inp -> do
_ <- checkWhitespace pos len inp
adjustPos retPos
toki Whitespace pos len inp }
-- FIXME: no @nl here to allow for comments on last line of the file with no trailing \n
-- FIXME: TODO: check the lack of @nl works here including counting line numbers
$spacetab* "--" $comment* { toki Comment }
}

<bol_section> {
@nbspspacetab* { \pos len inp -> checkLeadingWhitespace pos len inp >>= \len' ->
-- len' is character whitespace length (counting nbsp as one)
if B.length inp == len
then return (L pos EOF)
else do
-- Small hack: if char and byte length mismatch
-- subtract the difference, so lexToken will count position correctly.
-- Proper (and slower) fix is to count utf8 length in lexToken
when (len' /= len) $ adjustPos (incPos (len' - len))
setStartCode in_section
return (L pos (Indent len')) }
@nbspspacetab* { \pos len inp -> do
len' <- checkLeadingWhitespace pos len inp
-- len' is character whitespace length (counting nbsp as one)
if B.length inp == len
then return (L pos EOF)
else do
-- Small hack: if char and byte length mismatch
-- subtract the difference, so lexToken will count position correctly.
-- Proper (and slower) fix is to count utf8 length in lexToken
when (len' /= len) $ adjustPos (incPos (len' - len))
setStartCode in_section
return (L pos (Indent len')) }
$spacetab* \{ { tok OpenBrace }
$spacetab* \} { tok CloseBrace }
}

<in_section> {
$spacetab+ ; --TODO: don't allow tab as leading space

"--" $comment* ;

@name { toki TokSym }
@string { \pos len inp -> return $! L pos (TokStr (B.take (len - 2) (B.tail inp))) }
@oplike { toki TokOther }
$paren { toki TokOther }
\: { tok Colon }
\{ { tok OpenBrace }
\} { tok CloseBrace }
@nl { \_ _ _ -> adjustPos retPos >> setStartCode bol_section >> lexToken }
--TODO: don't allow tab as leading space
$spacetab+ { toki Whitespace }

"--" $comment* { toki Comment }

@name { toki TokSym }
@string { \pos len inp -> return $! L pos (TokStr (B.take (len - 2) (B.tail inp))) }
@oplike { toki TokOther }
$paren { toki TokOther }
\: { tok Colon }
\{ { tok OpenBrace }
\} { tok CloseBrace }
@nl { \pos len inp -> do
adjustPos retPos
setStartCode bol_section
lexToken }
}

<bol_field_layout> {
@nbspspacetab* { \pos len inp -> checkLeadingWhitespace pos len inp >>= \len' ->
if B.length inp == len
then return (L pos EOF)
else do
-- Small hack: if char and byte length mismatch
-- subtract the difference, so lexToken will count position correctly.
-- Proper (and slower) fix is to count utf8 length in lexToken
when (len' /= len) $ adjustPos (incPos (len' - len))
setStartCode in_field_layout
return (L pos (Indent len')) }
@nbspspacetab* { \pos len inp -> do
len' <- checkLeadingWhitespace pos len inp
if B.length inp == len
then return (L pos EOF)
else do
-- Small hack: if char and byte length mismatch
-- subtract the difference, so lexToken will count position correctly.
-- Proper (and slower) fix is to count utf8 length in lexToken
when (len' /= len) $ adjustPos (incPos (len' - len))
setStartCode in_field_layout
return (L pos (Indent len')) }
}

<in_field_layout> {
$spacetab+;
$field_layout' $field_layout* { toki TokFieldLine }
@nl { \_ _ _ -> adjustPos retPos >> setStartCode bol_field_layout >> lexToken }
$spacetab+ { toki Whitespace }
$field_layout' $field_layout* { toki TokFieldLine }
@nl { \pos len inp -> do
adjustPos retPos
setStartCode bol_field_layout
lexToken }
}

<bol_field_braces> {
() { \_ _ _ -> setStartCode in_field_braces >> lexToken }
() { \_ _ _ -> setStartCode in_field_braces >> lexToken }
}

<in_field_braces> {
$spacetab+;
$spacetab+ { toki Whitespace }
$field_braces' $field_braces* { toki TokFieldLine }
\{ { tok OpenBrace }
\} { tok CloseBrace }
@nl { \_ _ _ -> adjustPos retPos >> setStartCode bol_field_braces >> lexToken }
\{ { tok OpenBrace }
\} { tok CloseBrace }
@nl { \pos len inp -> do
adjustPos retPos
setStartCode bol_field_braces
lexToken }
}

{

-- | Tokens of outer cabal file structure. Field values are treated opaquely.
data Token = TokSym !ByteString -- ^ Haskell-like identifier, number or operator
| TokStr !ByteString -- ^ String in quotes
| TokOther !ByteString -- ^ Operators and parens
| Indent !Int -- ^ Indentation token
data Token = TokSym !ByteString -- ^ Haskell-like identifier, number or operator
| TokStr !ByteString -- ^ String in quotes
| TokOther !ByteString -- ^ Operators and parens
| Indent !Int -- ^ Indentation token
| TokFieldLine !ByteString -- ^ Lines after @:@
| Colon
| OpenBrace
| CloseBrace
| Whitespace !ByteString
| Comment !ByteString
| EOF
| LexicalError InputStream --TODO: add separate string lexical error
deriving Show
Expand Down Expand Up @@ -230,7 +247,6 @@ lexToken = do
setInput inp'
let !len_bytes = B.length inp - B.length inp'
t <- action pos len_bytes inp
--traceShow t $ return tok
return t


Expand Down Expand Up @@ -259,11 +275,29 @@ lexAll = do
_ -> do ts <- lexAll
return (t : ts)

-- FIXME: for debugging
lexAll' :: Lex [(Int, LToken)]
lexAll' = do
t <- lexToken
c <- getStartCode
case t of
L _ EOF -> return [(c, t)]
_ -> do ts <- lexAll'
return ((c, t) : ts)

ltest :: Int -> String -> Prelude.IO ()
ltest code s =
let (ws, xs) = execLexer (setStartCode code >> lexAll) (B.Char8.pack s)
in traverse_ print ws >> traverse_ print xs

lexString :: String -> ([LexWarning], [LToken])
lexString = execLexer lexAll . B.Char8.pack

lexByteString :: ByteString -> ([LexWarning], [LToken])
lexByteString = execLexer lexAll

lexByteString' :: ByteString -> ([LexWarning], [(Int, LToken)])
lexByteString' = execLexer lexAll'

mkLexState :: ByteString -> LexState
mkLexState input = LexState
Expand Down
Loading