@@ -27,6 +27,8 @@ module analysis::diff::edits::HiFiLayoutDiff
2727extend analysis ::diff ::edits ::HiFiTreeDiff ;
2828import ParseTree ; // this should not be necessary because imported by HiFiTreeDiff
2929import String ; // this should not be be necessary because imported by HiFiTreeDiff
30+ import lang ::rascal ::grammar ::definition ::Characters ;
31+ import IO ;
3032
3133@synopsis {Normalization choices for case-insensitive literals.}
3234data CaseInsensitivity
@@ -56,7 +58,7 @@ list[TextEdit] layoutDiff(Tree original, Tree formatted, bool recoverComments =
5658 list [TextEdit ] rec (
5759 t :appl (prod (Symbol tS , _, _), list [Tree ] tArgs ), // layout is not necessarily parsed with the same rules (i.e. comments are lost!)
5860 u :appl (prod (Symbol uS , _, _), list [Tree ] uArgs ))
59- = [replace (t @\loc , recoverComments ? learnComments (t , u ) : "<u > " ) | tArgs != uArgs , "<t > " != " < u > " /* avoid useless edits */ ]
61+ = [replace (t @\loc , repl ) | tArgs != uArgs , str repl := ( recoverComments ? learnComments (t , u ) : "<u > " ), repl != "<t > " /* do not edit anything if nothing has changed */ ]
6062 when
6163 delabel (tS ) is layouts ,
6264 delabel (uS ) is layouts ,
@@ -106,22 +108,31 @@ list[TextEdit] layoutDiff(Tree original, Tree formatted, bool recoverComments =
106108 default list [TextEdit ] rec (
107109 Tree t :appl (Production p , list [Tree ] argsA ),
108110 appl (p /* must be the same by the above assert */ , list [Tree ] argsB ))
109- = [*rec (a , b ) | < a , b > <- zip2 (argsA , argsB ) ];
111+ = [*rec (argsA [ i ], argsB [ i ] ) | i <- [ 0 .. size (argsA )] ];
110112
111113 // first add required locations to layout nodes
112- original = reposition (original , markLit =true , markLayout =true , markSubLayout =true );
114+ // TODO: check if indeed repositioning is never needed
115+ // original = reposition(original, markLit=true, markLayout=true, markSubLayout=true);
113116
114117 return rec (original , formatted );
115118}
116119
120+ private Symbol newlineClass = \char -class ([range (10 ,10 )]);
121+
117122@synopsis {Make sure the new layout still contains all the source code comments of the original layout}
118123@description {
119- This algorithm uses the @category("Comments") tag to detect source code comments inside layout substrings. If the original
124+ This algorithm uses the ` @category(/[cC]omments/)` tag to detect source code comments inside layout substrings. If the original
120125layout contains comments, we re-introduce the comments at the expected level of indentation. New comments present in the
121126replacement are kept and will overwrite any original comments.
122127
123- This trick is complicated by the syntax of multiline comments and single line comments that have
124- to end with a newline.
128+ There are corner cases with respect to the original comments:
129+ * the single line comment that does not end with a newline itself, yet it must always end with a newline after it.
130+ * multiple single line comments after each other
131+
132+ Then there are corner cases with respect to the replacement whitespace:
133+ * the last line of the replacement whitespace is special. This is the indentation to use for all comments.
134+ * but there could be no newlines in the replacement whitespace; and still there is a single line comment to be included.
135+ Now we need to infer an indentation level for what follows the comment from "thin air".
125136}
126137@benefits {
127138* if comments are kepts and formatted by tools like Tree2Box, then this algorithm does not overwrite these.
@@ -132,7 +143,14 @@ to end with a newline.
132143* if comments are not marked with `@category("Comment")` in the original grammar, then this algorithm recovers nothing.
133144}
134145private str learnComments (Tree original , Tree replacement ) {
135- originalComments = ["<c > " | /c:appl(prod(_,_,{\tag("category"(/ ^[Cc ]omment $/)), *_}), _) := original];
146+ bool mustEndWithNewline (lit ("\n " )) = true ;
147+ bool mustEndWithNewline (conditional (Symbol s , _)) = mustEndWithNewline (s );
148+ // if a comment can not contain newline characters, but everything else, then it must be followed by one:
149+ bool mustEndWithNewline (\it er(Symbol cc :\char -class (_))) = intersection (cc , newlineClass ) != newlineClass ;
150+ bool mustEndWithNewline (\it er-star (Symbol cc :\char -class (_))) = intersection (cc , newlineClass ) != newlineClass ;
151+ default bool mustEndWithNewline (_) = false ;
152+
153+ originalComments = [<s , s [-1 ] == "\n " || mustEndWithNewline (lastSym )> | /c:appl(prod(_,[*_,Symbol lastSym],{\tag("category"(/ ^[Cc ]omment $/)), *_}), _) := original, str s := "<c > "];
136154
137155 if (originalComments == []) {
138156 // if the original did not contain comments, stick with the replacements
@@ -146,23 +164,42 @@ private str learnComments(Tree original, Tree replacement) {
146164 return "<replacement > " ;
147165 }
148166
149- // At this point, we know that: (a) comments are not present in the replacement and (b) they used to be there in the original.
150- // So the old comments are going to be the new output. however, we want to learn indentation from the replacement.
167+ // At this point, we know that:
168+ // (a) comments are not present in the replacement and
169+ // (b) they used to be there in the original.
170+ // So the old comments are going to be copied to the new output.
171+ // But, we want to indent them using the style of the replacement.
172+
173+ // The last line of the replacement string typically has the indentation for the construct that follows:
174+ // | // a comment
175+ // | if (true) {
176+ // ^^^^
177+ // newIndent
178+ //
179+ // However, if the replacement string is on a single line, then we don't have the indentation
180+ // for the string on the next line readily available. In this case we indent the next line
181+ // to the start column of the replacement layout, as a proxy.
182+
183+ str replString = "<replacement > " ;
184+ str newIndent = split ("\n " , replString )[-1 ] ? "" ;
151185
152- // Drop the last newline of single-line comments, because we don't want two newlines in the output for every comment:
153- str dropEndNl (str line :/^.*\n$/ ) = (line [..-1 ]);
154- default str dropEndNl (str line ) = line ;
186+ if (/\n/ !:= replString ) {
187+ // no newline in the repl string, so no indentation available for what follows the comment...
188+ newIndent = "<for (_ <- [0 ..replacement @\loc .begin .column ]) {> <}> " ;
189+ }
155190
156- // the first line of the replacement ,is the indentation to use.
157- str replString = "<replacement > " ;
158- str replacementIndent = /^\n+$/ !:= replString
159- ? split ("\n " , replString )[0 ]
160- : "" ;
161-
162- // trimming each line makes sure we forget about the original indentation, and drop accidental spaces after comment lines
163- return replString + indent (replacementIndent ,
164- "<for (c <- originalComments , str line <- split ("\n " , dropEndNl (c ))) {><indent (replacementIndent , trim (line ), indentFirstLine =true )>
165- '<}> " [..-1 ], indentFirstLine =false ) + replString ;
191+ // we always place sequential comments vertically, because we don't know if we are dealing
192+ // we a single line comment that has to end with newline by follow restriction or by a literal "\n".
193+ // TODO: a deeper analysis of the comment rule that's in use could also be used to discover this.
194+ str trimmedOriginals = "<for (<c , newLine > <- originalComments ) {><trim (c )><if (newLine ) {>
195+ '<}><}> " ;
196+
197+ // we wrap the comment with the formatted whitespace to assure the proper indentation
198+ // of its first line, and the proper indentation of what comes after this layout node
199+ return replString
200+ + indent (newIndent , trimmedOriginals , indentFirstLine =false )
201+ + newIndent
202+ ;
166203}
167204
168205private Symbol delabel (label (_, Symbol t )) = t ;
0 commit comments