I want to break lines of code (adding #') when they make part of chunk (line begins with #' (#\x27)), when exceeding 100 cols.
My solution does not work for several chunks:
Example file:
#' chunk line
#' big chunk line to split big chunk line to split big chunk line to split big chunk line to split big chunk line to split
#' ruler90123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
#'
not chunk line do nothing
big do nothing line big do nothing line big do nothing line big do nothing line big do nothing line big do nothing line big do nothing line
#' chunk line
#' big chunk line to split big chunk line to split big chunk line to split big chunk line to split big chunk line to split
#' ruler90123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
#'
not chunk line do nothing
big do nothing line big do nothing line big do nothing line big do nothing line big do nothing line big do nothing line big do nothing line
My try: (works if only one chunk present)
perl -0777 -pe '
s{#\x27.*#\x27}{ q{ gets lines from #\x27 to #\x27 (chunk) };
($r = $&) =~ s/\n!\n#\x27//g; q{ removes breaks except followed by #\x27 };
$r =~ s/\G.{0,100}(\s|.$)\K/\n#\x27 /g; q{ before column 100 adds break + #\x27 };
$r =~ s/#\x27 #\x27/#\x27/g; q{ removes duplicated #\x27 };
$r =~ s/\n\n/\n/g; q{ removes duplicated breaks };
$r
}gse' < chunks.txt
Expected output: (two times this)
#' chunk line
#' big chunk line to split big chunk line to split big chunk line to split big chunk line to split
#' big chunk line to split
#' ruler90123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
#'
not chunk line do nothing
big do nothing line big do nothing line big do nothing line big do nothing line big do nothing line big do nothing line big do nothing line
Workaround in R
psum <- function(...,na.rm=FALSE) {
rowSums(do.call(cbind,list(...)),na.rm=na.rm)
}
gblines<-readLines("chunks.txt")
newgblines<-character()
i<-1
j<-1
repeat {
newgblines[j] <- gblines[i]
if (grepl("^#\'",newgblines[j] ) & nchar( newgblines[j] ) > 100 ) { # select lines with more than 100 and beginning in #'
repeat{
greps<-gregexpr(pattern ="\\s",newgblines[j])[[1]] # get position of spaces
lenG<-length(greps)
sums<-psum(-greps , rep(100,lenG ) ) # calculate which space is closest to col. 100
index <- which(sums>0)
minSums<- min(sums[index])
index2<-which(sums==minSums) # index of space in greps
cutpoint<-greps[index2]
nchar2<-nchar(newgblines[j]) # number of chars. in line
strFirst <-substr(newgblines[j],1,cutpoint) # cut before col. 100
strSecond<-substr(newgblines[j],cutpoint+1,nchar2) # segmente after col. 100
newgblines[j]<-strFirst
j<-j+1
newgblines[j]<-paste0("#\' ",strSecond)
if (nchar(strSecond)<=100 ){
break
}
} #
} # if
i <- i+1
j <- j+1
if (i>length(gblines) ){
break
}
}
newgblines