From ddd164d026d24fd5a2ca3e3d0be06bbcaebb2476 Mon Sep 17 00:00:00 2001 From: Ævar Arnfjörð Bjarmason Date: Thu, 8 Apr 2021 17:04:16 +0200 Subject: userdiff style: re-order drivers in alphabetical order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address some old code smell and move around the built-in userdiff drivers so they're both in alphabetical order, and now in the same order they appear in the gitattributes(5) documentation. The two started drifting in be58e70dba (diff: unify external diff and funcname parsing code, 2008-10-05), and then even further in 80c49c3de2 (color-words: make regex configurable via attributes, 2009-01-17) when the "cpp" pattern was added. There are no functional changes here, and as --color-moved will show only moved existing lines. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- userdiff.c | 76 +++++++++++++++++++++++++++++++------------------------------- 1 file changed, 38 insertions(+), 38 deletions(-) (limited to 'userdiff.c') diff --git a/userdiff.c b/userdiff.c index 3f81a2261c..650f421d63 100644 --- a/userdiff.c +++ b/userdiff.c @@ -44,6 +44,44 @@ PATTERNS("bash", /* -- */ /* Characters not in the default $IFS value */ "[^ \t]+"), +PATTERNS("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$", + "[={}\"]|[^={}\" \t]+"), +PATTERNS("cpp", + /* Jump targets or access declarations */ + "!^[ \t]*[A-Za-z_][A-Za-z_0-9]*:[[:space:]]*($|/[/*])\n" + /* functions/methods, variables, and compounds at top level */ + "^((::[[:space:]]*)?[A-Za-z_].*)$", + /* -- */ + "[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lLuU]*" + "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->\\*?|\\.\\*"), +PATTERNS("csharp", + /* Keywords */ + "!^[ \t]*(do|while|for|if|else|instanceof|new|return|switch|case|throw|catch|using)\n" + /* Methods and constructors */ + "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe|async)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[<>@._[:alnum:]]+[ \t]*\\(.*\\))[ \t]*$\n" + /* Properties */ + "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[@._[:alnum:]]+)[ \t]*$\n" + /* Type definitions */ + "^[ \t]*(((static|public|internal|private|protected|new|unsafe|sealed|abstract|partial)[ \t]+)*(class|enum|interface|struct)[ \t]+.*)$\n" + /* Namespace */ + "^[ \t]*(namespace[ \t]+.*)$", + /* -- */ + "[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?" + "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->"), +IPATTERN("css", + "![:;][[:space:]]*$\n" + "^[:[@.#]?[_a-z0-9].*$", + /* -- */ + /* + * This regex comes from W3C CSS specs. Should theoretically also + * allow ISO 10646 characters U+00A0 and higher, + * but they are not handled in this regex. + */ + "-?[_a-zA-Z][-_a-zA-Z0-9]*" /* identifiers */ + "|-?[0-9]+|\\#[0-9a-fA-F]+" /* numbers */ +), PATTERNS("dts", "!;\n" "!=\n" @@ -191,46 +229,8 @@ PATTERNS("rust", "[a-zA-Z_][a-zA-Z0-9_]*" "|[0-9][0-9_a-fA-Fiosuxz]*(\\.([0-9]*[eE][+-]?)?[0-9_fF]*)?" "|[-+*\\/<>%&^|=!:]=|<<=?|>>=?|&&|\\|\\||->|=>|\\.{2}=|\\.{3}|::"), -PATTERNS("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$", - "[={}\"]|[^={}\" \t]+"), PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$", "\\\\[a-zA-Z@]+|\\\\.|[a-zA-Z0-9\x80-\xff]+"), -PATTERNS("cpp", - /* Jump targets or access declarations */ - "!^[ \t]*[A-Za-z_][A-Za-z_0-9]*:[[:space:]]*($|/[/*])\n" - /* functions/methods, variables, and compounds at top level */ - "^((::[[:space:]]*)?[A-Za-z_].*)$", - /* -- */ - "[a-zA-Z_][a-zA-Z0-9_]*" - "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lLuU]*" - "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->\\*?|\\.\\*"), -PATTERNS("csharp", - /* Keywords */ - "!^[ \t]*(do|while|for|if|else|instanceof|new|return|switch|case|throw|catch|using)\n" - /* Methods and constructors */ - "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe|async)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[<>@._[:alnum:]]+[ \t]*\\(.*\\))[ \t]*$\n" - /* Properties */ - "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[@._[:alnum:]]+)[ \t]*$\n" - /* Type definitions */ - "^[ \t]*(((static|public|internal|private|protected|new|unsafe|sealed|abstract|partial)[ \t]+)*(class|enum|interface|struct)[ \t]+.*)$\n" - /* Namespace */ - "^[ \t]*(namespace[ \t]+.*)$", - /* -- */ - "[a-zA-Z_][a-zA-Z0-9_]*" - "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?" - "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->"), -IPATTERN("css", - "![:;][[:space:]]*$\n" - "^[:[@.#]?[_a-z0-9].*$", - /* -- */ - /* - * This regex comes from W3C CSS specs. Should theoretically also - * allow ISO 10646 characters U+00A0 and higher, - * but they are not handled in this regex. - */ - "-?[_a-zA-Z][-_a-zA-Z0-9]*" /* identifiers */ - "|-?[0-9]+|\\#[0-9a-fA-F]+" /* numbers */ -), { "default", NULL, -1, { NULL, 0 } }, }; #undef PATTERNS -- cgit 1.2.3-korg From 6d1c9c527e5d2f4a00827ee2eac29709a1aff1ee Mon Sep 17 00:00:00 2001 From: Ævar Arnfjörð Bjarmason Date: Thu, 8 Apr 2021 17:04:17 +0200 Subject: userdiff style: declare patterns with consistent style MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change those patterns which were declared with a regex on the same line as the "PATTERNS()" line to put that regex on the next line, and add missing "/* -- */" separator comments between the pattern and word_regex. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- userdiff.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'userdiff.c') diff --git a/userdiff.c b/userdiff.c index 650f421d63..33b0ce4020 100644 --- a/userdiff.c +++ b/userdiff.c @@ -44,7 +44,9 @@ PATTERNS("bash", /* -- */ /* Characters not in the default $IFS value */ "[^ \t]+"), -PATTERNS("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$", +PATTERNS("bibtex", + "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$", + /* -- */ "[={}\"]|[^={}\" \t]+"), PATTERNS("cpp", /* Jump targets or access declarations */ @@ -121,7 +123,9 @@ IPATTERN("fortran", * they would have been matched above as a variable anyway. */ "|[-+]?[0-9.]+([AaIiDdEeFfLlTtXx][Ss]?[-+]?[0-9.]*)?(_[a-zA-Z0-9][a-zA-Z0-9_]*)?" "|//|\\*\\*|::|[/<>=]="), -IPATTERN("fountain", "^((\\.[^.]|(int|ext|est|int\\.?/ext|i/e)[. ]).*)$", +IPATTERN("fountain", + "^((\\.[^.]|(int|ext|est|int\\.?/ext|i/e)[. ]).*)$", + /* -- */ "[^ \t-]+"), PATTERNS("golang", /* Functions */ @@ -132,7 +136,9 @@ PATTERNS("golang", "[a-zA-Z_][a-zA-Z0-9_]*" "|[-+0-9.eE]+i?|0[xX]?[0-9a-fA-F]+i?" "|[-+*/<>%&^|=!:]=|--|\\+\\+|<<=?|>>=?|&\\^=?|&&|\\|\\||<-|\\.{3}"), -PATTERNS("html", "^[ \t]*(<[Hh][1-6]([ \t].*)?>.*)$", +PATTERNS("html", + "^[ \t]*(<[Hh][1-6]([ \t].*)?>.*)$", + /* -- */ "[^<>= \t]+"), PATTERNS("java", "!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n" @@ -144,6 +150,7 @@ PATTERNS("java", "|--|\\+\\+|<<=?|>>>?=?|&&|\\|\\|"), PATTERNS("markdown", "^ {0,3}#{1,6}[ \t].*", + /* -- */ "[^<>= \t]+"), PATTERNS("matlab", /* @@ -152,6 +159,7 @@ PATTERNS("matlab", * that is understood by both. */ "^[[:space:]]*((classdef|function)[[:space:]].*)$|^(%%%?|##)[[:space:]].*$", + /* -- */ "[a-zA-Z_][a-zA-Z0-9_]*|[-+0-9.e]+|[=~<>]=|\\.[*/\\^']|\\|\\||&&"), PATTERNS("objc", /* Negate C statements that can look like functions */ @@ -212,13 +220,15 @@ PATTERNS("php", "[a-zA-Z_][a-zA-Z0-9_]*" "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+" "|[-+*/<>%&^|=!.]=|--|\\+\\+|<<=?|>>=?|===|&&|\\|\\||::|->"), -PATTERNS("python", "^[ \t]*((class|(async[ \t]+)?def)[ \t].*)$", +PATTERNS("python", + "^[ \t]*((class|(async[ \t]+)?def)[ \t].*)$", /* -- */ "[a-zA-Z_][a-zA-Z0-9_]*" "|[-+0-9.e]+[jJlL]?|0[xX]?[0-9a-fA-F]+[lL]?" "|[-+*/<>%&^|=!]=|//=?|<<=?|>>=?|\\*\\*=?"), /* -- */ -PATTERNS("ruby", "^[ \t]*((class|module|def)[ \t].*)$", +PATTERNS("ruby", + "^[ \t]*((class|module|def)[ \t].*)$", /* -- */ "(@|@@|\\$)?[a-zA-Z_][a-zA-Z0-9_]*" "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+|\\?(\\\\C-)?(\\\\M-)?." -- cgit 1.2.3-korg From 82512e008c82834df5a00ea79e1091da45b330b0 Mon Sep 17 00:00:00 2001 From: Ævar Arnfjörð Bjarmason Date: Thu, 8 Apr 2021 17:04:18 +0200 Subject: userdiff style: normalize pascal regex declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Declare the pascal pattern consistently with how we declare the others, not having "\n" on one line by itself, but as part of the pattern, and when there are alterations have the "|" at the start, not end of the line. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- userdiff.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'userdiff.c') diff --git a/userdiff.c b/userdiff.c index 33b0ce4020..978ae64155 100644 --- a/userdiff.c +++ b/userdiff.c @@ -175,9 +175,8 @@ PATTERNS("objc", "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?" "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->"), PATTERNS("pascal", - "^(((class[ \t]+)?(procedure|function)|constructor|destructor|interface|" - "implementation|initialization|finalization)[ \t]*.*)$" - "\n" + "^(((class[ \t]+)?(procedure|function)|constructor|destructor|interface" + "|implementation|initialization|finalization)[ \t]*.*)$\n" "^(.*=[ \t]*(class|record).*)$", /* -- */ "[a-zA-Z_][a-zA-Z0-9_]*" -- cgit 1.2.3-korg From f12fa9ee6c87efa8a926973bd203ef327686fb62 Mon Sep 17 00:00:00 2001 From: Ævar Arnfjörð Bjarmason Date: Thu, 8 Apr 2021 17:04:19 +0200 Subject: userdiff: add and use for_each_userdiff_driver() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor the userdiff_find_by_namelen() function so that a new for_each_userdiff_driver() API function does most of the work. This will be useful for the same reason we've got other for_each_*() API functions as part of various APIs, and will be used in a follow-up commit. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- userdiff.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++----------- userdiff.h | 13 ++++++++++++ 2 files changed, 71 insertions(+), 12 deletions(-) (limited to 'userdiff.c') diff --git a/userdiff.c b/userdiff.c index 978ae64155..a667ccaa8c 100644 --- a/userdiff.c +++ b/userdiff.c @@ -259,20 +259,33 @@ static struct userdiff_driver driver_false = { { NULL, 0 } }; -static struct userdiff_driver *userdiff_find_by_namelen(const char *k, size_t len) +struct find_by_namelen_data { + const char *name; + size_t len; + struct userdiff_driver *driver; +}; + +static int userdiff_find_by_namelen_cb(struct userdiff_driver *driver, + enum userdiff_driver_type type, void *priv) { - int i; - for (i = 0; i < ndrivers; i++) { - struct userdiff_driver *drv = drivers + i; - if (!strncmp(drv->name, k, len) && !drv->name[len]) - return drv; - } - for (i = 0; i < ARRAY_SIZE(builtin_drivers); i++) { - struct userdiff_driver *drv = builtin_drivers + i; - if (!strncmp(drv->name, k, len) && !drv->name[len]) - return drv; + struct find_by_namelen_data *cb_data = priv; + + if (!strncmp(driver->name, cb_data->name, cb_data->len) && + !driver->name[cb_data->len]) { + cb_data->driver = driver; + return 1; /* tell the caller to stop iterating */ } - return NULL; + return 0; +} + +static struct userdiff_driver *userdiff_find_by_namelen(const char *name, size_t len) +{ + struct find_by_namelen_data udcbdata = { + .name = name, + .len = len, + }; + for_each_userdiff_driver(userdiff_find_by_namelen_cb, &udcbdata); + return udcbdata.driver; } static int parse_funcname(struct userdiff_funcname *f, const char *k, @@ -379,3 +392,36 @@ struct userdiff_driver *userdiff_get_textconv(struct repository *r, return driver; } + +static int for_each_userdiff_driver_list(each_userdiff_driver_fn fn, + enum userdiff_driver_type type, void *cb_data, + struct userdiff_driver *drv, + int drv_size) +{ + int i; + int ret; + for (i = 0; i < drv_size; i++) { + struct userdiff_driver *item = drv + i; + if ((ret = fn(item, type, cb_data))) + return ret; + } + return 0; +} + +int for_each_userdiff_driver(each_userdiff_driver_fn fn, void *cb_data) +{ + int ret; + + ret = for_each_userdiff_driver_list(fn, USERDIFF_DRIVER_TYPE_CUSTOM, + cb_data, drivers, ndrivers); + if (ret) + return ret; + + ret = for_each_userdiff_driver_list(fn, USERDIFF_DRIVER_TYPE_BUILTIN, + cb_data, builtin_drivers, + ARRAY_SIZE(builtin_drivers)); + if (ret) + return ret; + + return 0; +} diff --git a/userdiff.h b/userdiff.h index 203057e13e..aee91bc77e 100644 --- a/userdiff.h +++ b/userdiff.h @@ -21,6 +21,12 @@ struct userdiff_driver { struct notes_cache *textconv_cache; int textconv_want_cache; }; +enum userdiff_driver_type { + USERDIFF_DRIVER_TYPE_BUILTIN = 1<<0, + USERDIFF_DRIVER_TYPE_CUSTOM = 1<<1, +}; +typedef int (*each_userdiff_driver_fn)(struct userdiff_driver *, + enum userdiff_driver_type, void *); int userdiff_config(const char *k, const char *v); struct userdiff_driver *userdiff_find_by_name(const char *name); @@ -34,4 +40,11 @@ struct userdiff_driver *userdiff_find_by_path(struct index_state *istate, struct userdiff_driver *userdiff_get_textconv(struct repository *r, struct userdiff_driver *driver); +/* + * Iterate over all userdiff drivers. The userdiff_driver_type + * argument to each_userdiff_driver_fn indicates their type. Return + * non-zero to exit early from the loop. + */ +int for_each_userdiff_driver(each_userdiff_driver_fn, void *); + #endif /* USERDIFF */ -- cgit 1.2.3-korg