Mathias answer is pretty good. I wrote one myself but he was faster :) Anyway here is my solution. As mentioned above precompiling the regex patterns for performance improvements is the way to go here. I would also include a pattern for the 1858.0 case which improves overall readability i think. Since python 3.6 f-stringsPython 3.6 f-strings are available and make reading code so much easier - you should start using them.
import re
PATTERNS = [
r'(\d+)\.\d*', # 1858.0
r'(\d{1,2})-(\d{1,2})-(\d{4})$', # 1-12-1963
r'(\d{4})-(\d{1,2})-(\d{1,2})$', # 1789-7-14
r'(\d{4})-(\d{1,2})$', # 1945-2
r'(\d{1,2})-(\d{4})$' # 2-1883
]
PATTERNS = [re.compile(p) for p in PATTERNS]
def cleanup(date):
if type(date) not str:
date = str(date)
for i, p in enumerate(PATTERNS):
res = p.match(date)
if res and i == 0:
return f'{res[1]}'
if res and i == 1:
return f'{res[3]}-{res[2] :0>2}-{res[1] :0>2}'
if res and i == 2:
return f'{res[1]}-{res[2] :0>2}-{res[3] :0>2}'
if res and i == 3:
return f'{res[1]}-{res[2] :0>2}'
if res and i == 4:
return f'{res[2]}-{res[1] :0>2}'
def main():
dates = 1858.0, '1-12-1963', '1945-2', '7-2018', '1789-7-14',
for date in dates:
print(f'in: {date} out: {cleanup(date)}')
if __name__ == "__main__":
main()