You can use a recursive sub-query factoring clause without any regular expressions and which correctly handles nested braces (such as (abc; (def;fff); zzyzx)):
WITH char_groups (
id, value, idx, len, grp, grp_start, is_end, brace_level
) AS (
SELECT id,
value,
DECODE( SUBSTR( value, 1, 1 ), ';', 0, 1 ),
LENGTH( value ),
1,
DECODE( SUBSTR( value, 1, 1 ), ';', 0, 1 ),
CASE
WHEN ';' IN ( SUBSTR( value, 1, 1 ), SUBSTR( value, 2, 1 ) )
OR value IS NULL
THEN 1
ELSE 0
END,
DECODE( SUBSTR( value, 1, 1 ), '(', 1, 0 )
FROM table_name
UNION ALL
SELECT id,
value,
idx + 1,
len,
CASE
WHEN brace_level = 0 AND SUBSTR( value, idx + 1, 1 ) = ';'
THEN grp + 1
ELSE grp
END,
CASE
WHEN brace_level = 0 AND SUBSTR( value, idx + 1, 1 ) = ';'
THEN idx + 1
ELSE grp_start
END,
CASE
WHEN (
(
( brace_level = 0 AND SUBSTR( value, idx + 1, 1 ) <> '(' )
OR
( brace_level = 1 AND SUBSTR( value, idx + 1, 1 ) = ')' )
)
AND
SUBSTR( value, idx + 2, 1 ) = ';'
)
OR idx + 1 = len
THEN 1
ELSE 0
END,
GREATEST(
brace_level + DECODE( SUBSTR( value, idx + 1, 1 ), '(', 1, ')', -1, 0 ),
0
)
FROM char_groups
WHERE idx < len
)
SELECT id,
grp,
LTRIM( SUBSTR( value, grp_start, idx - grp_start + 1 ), ';' ) AS term
FROM char_groups cg
WHERE is_end = 1
ORDER BY id, grp
Which, for the sample data:
CREATE TABLE table_name ( id, value ) AS
SELECT 1, '1111 = 2222; 3333 in 4444; 5555 sum (6666; 7777)' FROM DUAL UNION ALL
SELECT 2, 'ABCD' FROM DUAL UNION ALL
SELECT 3, 'A;;C;D;;F(F;F;F);' FROM DUAL UNION ALL
SELECT 4, NULL FROM DUAL UNION ALL
SELECT 5, 'abc; (e;f) = ( e; f)' FROM DUAL UNION ALL
SELECT 6, '(abc; (def;fff); zzyzx)' FROM DUAL UNION ALL
SELECT 7, '(((abc)));(def));((ghi;jkl);mno);pqr' FROM DUAL UNION ALL
SELECT 8, ';' FROM DUAL UNION ALL
SELECT 9, ';A;' FROM DUAL UNION ALL
SELECT 10, '()(;);(;)(;)' FROM DUAL;
Outputs (including NULL values for when you have two adjacent ; delimiters):
ID | GRP | TERM
-: | --: | :----------------------
1 | 1 | 1111 = 2222
1 | 2 | 3333 in 4444
1 | 3 | 5555 sum (6666; 7777)
2 | 1 | ABCD
3 | 1 | A
3 | 2 | null
3 | 3 | C
3 | 4 | D
3 | 5 | null
3 | 6 | F(F;F;F)
3 | 7 | null
4 | 1 | null
5 | 1 | abc
5 | 2 | (e;f) = ( e; f)
6 | 1 | (abc; (def;fff); zzyzx)
7 | 1 | (((abc)))
7 | 2 | (def))
7 | 3 | ((ghi;jkl);mno)
7 | 4 | pqr
8 | 1 | null
8 | 2 | null
9 | 1 | null
9 | 2 | A
9 | 3 | null
10 | 1 | ()(;)
10 | 2 | (;)(;)
db<>fiddle here
Previous Version:
Will work if you have single, balanced parentheses:
WITH terms ( id, value, idx, term, spos, epos ) AS (
SELECT id,
value,
1,
REGEXP_SUBSTR( value, '(([^;(]|\(.*?\))*)(;|$)', 1, 1, NULL, 1 ),
1,
REGEXP_INSTR( value, '(([^;(]|\(.*?\))*)(;|$)', 1, 1, 1 )
FROM table_name
UNION ALL
SELECT id,
value,
idx + 1,
REGEXP_SUBSTR( value, '(([^;(]|\(.*?\))*)(;|$)', epos, 1, NULL, 1 ),
epos,
REGEXP_INSTR( value, '(([^;(]|\(.*?\))*)(;|$)', epos, 1, 1 )
FROM terms
WHERE epos > 0
)
SELECT id, idx, term
FROM terms
WHERE SUBSTR( value, -1 ) = ';'
OR spos < LENGTH( value )
OR value IS NULL
ORDER BY id, idx;
Which outputs:
ID | IDX | TERM
-: | --: | :---------------------
1 | 1 | 1111 = 2222
1 | 2 | 3333 in 4444
1 | 3 | 5555 sum (6666; 7777)
2 | 1 | ABCD
3 | 1 | A
3 | 2 | null
3 | 3 | C
3 | 4 | D
3 | 5 | null
3 | 6 | F(F;F;F)
3 | 7 | null
4 | 1 | null
5 | 1 | abc
5 | 2 | (e;f) = ( e; f)
6 | 1 | (abc; (def;fff)
6 | 2 | zzyzx)
7 | 1 | (((abc)))
7 | 2 | (def))
7 | 3 | ((ghi;jkl)
7 | 4 | mno)
7 | 5 | pqr
8 | 1 | null
8 | 2 | null
9 | 1 | null
9 | 2 | A
9 | 3 | null
10 | 1 | ()(;)
10 | 2 | (;)(;)
(Note: it doesn't work for examples 6 & 7 where there are multiple nested parentheses but does work or the simpler test cases.)
db<>fiddle here
Update
I only wish replace ";" trough with " ; " except ";" in parenthesis.
You can use:
WITH char_groups (
id, value, idx, len, grp, brace_level
) AS (
SELECT id,
value,
CASE SUBSTR( value, 1, 1 ) WHEN ';' THEN 0 ELSE 1 END,
LENGTH( value ),
1,
CASE SUBSTR( value, 1, 1 ) WHEN '(' THEN 1 ELSE 0 END
FROM table_name
UNION ALL
SELECT id,
CASE
WHEN brace_level = 0 AND SUBSTR( value, idx + 1, 1 ) = ';'
THEN SUBSTR( value, 1, idx ) || ' ; ' || SUBSTR( value, idx + 2 )
ELSE value
END,
CASE
WHEN brace_level = 0 AND SUBSTR( value, idx + 1, 1 ) = ';'
THEN idx + 3
ELSE idx + 1
END,
CASE
WHEN brace_level = 0 AND SUBSTR( value, idx + 1, 1 ) = ';'
THEN len + 2
ELSE len
END,
CASE
WHEN brace_level = 0 AND SUBSTR( value, idx + 1, 1 ) = ';'
THEN grp + 1
ELSE grp
END,
GREATEST(
brace_level
+ CASE SUBSTR( value, idx + 1, 1 )
WHEN '(' THEN 1
WHEN ')' THEN -1
ELSE 0
END,
0
)
FROM char_groups
WHERE idx < len
)
SELECT id,
value
FROM char_groups
WHERE idx = len OR value IS NULL
ORDER BY id, idx
And, for the sample data:
CREATE TABLE table_name ( id, value ) AS
SELECT 1, '1111 = 2222; 3333 in 4444; 5555 sum (6666; 7777)' FROM DUAL UNION ALL
SELECT 2, 'ABCD' FROM DUAL UNION ALL
SELECT 3, 'A;;C;D;;F(F;F;F);' FROM DUAL UNION ALL
SELECT 4, NULL FROM DUAL UNION ALL
SELECT 5, 'abc; (e;f) = ( e; f)' FROM DUAL UNION ALL
SELECT 6, '(abc; (def;fff); zzyzx)' FROM DUAL UNION ALL
SELECT 7, '(((abc)));(def));((ghi;jkl);mno);pqr' FROM DUAL UNION ALL
SELECT 8, ';' FROM DUAL UNION ALL
SELECT 9, ';A;' FROM DUAL UNION ALL
SELECT 10, '()(;);(;)(;)' FROM DUAL UNION ALL
SELECT 11, '(a ; b);(c ; d)' FROM DUAL;
This outputs:
ID | VALUE
-: | :---------------------------------------------------
1 | 1111 = 2222 ; 3333 in 4444 ; 5555 sum (6666; 7777)
2 | ABCD
3 | A ; ; C ; D ; ; F(F;F;F) ;
4 | null
5 | abc ; (e;f) = ( e; f)
6 | (abc; (def;fff); zzyzx)
7 | (((abc))) ; (def)) ; ((ghi;jkl);mno) ; pqr
8 | ;
9 | ; A ;
10 | ()(;) ; (;)(;)
11 | (a ; b) ; (c ; d)
Or, for only single nested braces, you can use:
SELECT id,
RTRIM( REGEXP_REPLACE( value, '(([^;(]|\(.*?\))*)(;|$)', '\1 \3 ' ) )
|| CASE SUBSTR( value, -1 ) WHEN ';' THEN ' ' END AS value
FROM table_name
Which outputs:
ID | VALUE
-: | :---------------------------------------------------
1 | 1111 = 2222 ; 3333 in 4444 ; 5555 sum (6666; 7777)
2 | ABCD
3 | A ; ; C ; D ; ; F(F;F;F) ;
4 | null
5 | abc ; (e;f) = ( e; f)
6 | (abc; (def;fff) ; zzyzx)
7 | (((abc))) ; (def)) ; ((ghi;jkl) ; mno) ; pqr
8 | ;
9 | ; A ;
10 | ()(;) ; (;)(;)
11 | (a ; b) ; (c ; d)
db<>fiddle here
I already have a function to split after.
Please check your split function against case #11.
[^]to match the braces.