@@ -907,10 +907,13 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'):
907907 in the wide format, to be stripped from the names in the long format.
908908 For example, if your column names are A-suffix1, A-suffix2, you
909909 can strip the hypen by specifying `sep`='-'
910- suffix : str default '\d+'
910+ suffix : str, default '\d+'
911911 A regular expression capturing the wanted suffixes. '\d+' captures
912912 numeric suffixes. Suffixes with no numbers could be specified with the
913- negated character class '\D+'.
913+ negated character class '\D+'. You can also further disambiguate
914+ suffixes, for example, if your wide variables are of the form
915+ Aone, Btwo,.., and you have an unrelated column Arating, you can
916+ ignore the last one by specyfing `suffix`='(!?one|two)'
914917
915918 Returns
916919 -------
@@ -1048,16 +1051,7 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'):
10481051 in a typicaly case.
10491052 """
10501053 def get_var_names (df , stub , sep , suffix ):
1051- # The first part of this regex is needed to avoid multiple "greedy"
1052- # matches with stubs that have overlapping substrings. For example
1053- # A2011, A2012 are separate from AA2011, AA2012. And BBone, BBtwo is
1054- # different from Bone, Btwo, and BBBrating
1055- # The last part lets us disambiguate suffixes. For example, with
1056- # stubname A: (A2011, A2012) would be captured while Arating would
1057- # be ignored by the numeric class \d+
1058- regex = "^{0}(?!{1}){2}{3}" .format (
1059- re .escape (stub ), re .escape (stub [- 1 ]), re .escape (sep ), suffix )
1060-
1054+ regex = "^{0}{1}{2}" .format (re .escape (stub ), re .escape (sep ), suffix )
10611055 return df .filter (regex = regex ).columns .tolist ()
10621056
10631057 def melt_stub (df , stub , i , j , value_vars , sep ):
0 commit comments