Skip to main content
removed a line of code that I forgot to remove (print statement that was there for testing only)
Source Link
jp207
  • 173
  • 1
  • 7
#Program that detects dates in text and copies and prints them

import pyperclip, re
#DD/MM/YEAR format
dateRegex = re.compile(r'(\d\d)/(\d\d)/(\d\d\d\d)')
#text = str(pyperclip.paste())
text = 'Hello. Your birthday is on 29/02/1990. His birthday is on 40/09/1992 and her birthday is on 09/09/2000.'

matches = []
for groups in dateRegex.findall(text):
    day = groups[0]
    month = groups[1]
    year = groups[2]
    
    #convert to int for comparisons
    dayNum = int(day)
    monthNum = int(month)
    yearNum = int(year)
    
    #check if date and month values are valid
    if dayNum <= 31 and monthNum > 0 and monthNum <= 12:
        #months with 30 days
        if month in ('04', '06', '09', '11'):
            if not (dayNum > 0 and dayNum <= 30):
                print("Invalid date input")
                continue
        #February only
        if month == '02':
            #February doesn't have more than 29 days
            if dayNum > 29:
                continue
            if yearNum % 4 == 0:
                #leap years have 29 days in February
                if yearNum % 100 == 0 and yearNum % 400 != 0:
                    #not a leap year even if divisible by 4
                    if dayNum > 28:
                        continue
            else:
                if dayNum > 28:
                    continue
        #all other months have up to 31 days
        if month not in ('02', '04', '06', '09', '11'):
            if dayNum <= 0 and dayNum > 31:
                continue
    else:
        continue
    date = '/'.join([groups[0],groups[1],groups[2]])
    matches.append(date)

if len(matches) > 0:
    pyperclip.copy('\n'.join(matches))
    print('Copied to clipboard:')
    print('\n'.join(matches))
else:
    print('No dates found.')
#Program that detects dates in text and copies and prints them

import pyperclip, re
#DD/MM/YEAR format
dateRegex = re.compile(r'(\d\d)/(\d\d)/(\d\d\d\d)')
#text = str(pyperclip.paste())
text = 'Hello. Your birthday is on 29/02/1990. His birthday is on 40/09/1992 and her birthday is on 09/09/2000.'

matches = []
for groups in dateRegex.findall(text):
    day = groups[0]
    month = groups[1]
    year = groups[2]
    
    #convert to int for comparisons
    dayNum = int(day)
    monthNum = int(month)
    yearNum = int(year)
    
    #check if date and month values are valid
    if dayNum <= 31 and monthNum > 0 and monthNum <= 12:
        #months with 30 days
        if month in ('04', '06', '09', '11'):
            if not (dayNum > 0 and dayNum <= 30):
                print("Invalid date input")
                continue
        #February only
        if month == '02':
            #February doesn't have more than 29 days
            if dayNum > 29:
                continue
            if yearNum % 4 == 0:
                #leap years have 29 days in February
                if yearNum % 100 == 0 and yearNum % 400 != 0:
                    #not a leap year even if divisible by 4
                    if dayNum > 28:
                        continue
            else:
                if dayNum > 28:
                    continue
        #all other months have up to 31 days
        if month not in ('02', '04', '06', '09', '11'):
            if dayNum <= 0 and dayNum > 31:
                continue
    else:
        continue
    date = '/'.join([groups[0],groups[1],groups[2]])
    matches.append(date)

if len(matches) > 0:
    pyperclip.copy('\n'.join(matches))
    print('Copied to clipboard:')
    print('\n'.join(matches))
else:
    print('No dates found.')
#Program that detects dates in text and copies and prints them

import pyperclip, re
#DD/MM/YEAR format
dateRegex = re.compile(r'(\d\d)/(\d\d)/(\d\d\d\d)')
#text = str(pyperclip.paste())
text = 'Hello. Your birthday is on 29/02/1990. His birthday is on 40/09/1992 and her birthday is on 09/09/2000.'

matches = []
for groups in dateRegex.findall(text):
    day = groups[0]
    month = groups[1]
    year = groups[2]
    
    #convert to int for comparisons
    dayNum = int(day)
    monthNum = int(month)
    yearNum = int(year)
    
    #check if date and month values are valid
    if dayNum <= 31 and monthNum > 0 and monthNum <= 12:
        #months with 30 days
        if month in ('04', '06', '09', '11'):
            if not (dayNum > 0 and dayNum <= 30):
                continue
        #February only
        if month == '02':
            #February doesn't have more than 29 days
            if dayNum > 29:
                continue
            if yearNum % 4 == 0:
                #leap years have 29 days in February
                if yearNum % 100 == 0 and yearNum % 400 != 0:
                    #not a leap year even if divisible by 4
                    if dayNum > 28:
                        continue
            else:
                if dayNum > 28:
                    continue
        #all other months have up to 31 days
        if month not in ('02', '04', '06', '09', '11'):
            if dayNum <= 0 and dayNum > 31:
                continue
    else:
        continue
    date = '/'.join([groups[0],groups[1],groups[2]])
    matches.append(date)

if len(matches) > 0:
    pyperclip.copy('\n'.join(matches))
    print('Copied to clipboard:')
    print('\n'.join(matches))
else:
    print('No dates found.')
Tweeted twitter.com/StackCodeReview/status/1381532557146267648
Became Hot Network Question
Source Link
jp207
  • 173
  • 1
  • 7

Date Detection Regex in Python

I worked on a problem from Automate the Boring Stuff Chapter 7:

Write a regular expression that can detect dates in the DD/MM/YYYY format. Assume that the days range from 01 to 31, the months range from 01 to 12, and the years range from 1000 to 2999. Note that if the day or month is a single digit, it’ll have a leading zero.

The regular expression doesn’t have to detect correct days for each month or for leap years; it will accept nonexistent dates like 31/02/2020 or 31/04/2021. Then store these strings into variables named month, day, and year, and write additional code that can detect if it is a valid date. April, June, September, and November have 30 days, February has 28 days, and the rest of the months have 31 days. February has 29 days in leap years. Leap years are every year evenly divisible by 4, except for years evenly divisible by 100, unless the year is also evenly divisible by 400. Note how this calculation makes it impossible to make a reasonably sized regular expression that can detect a valid date.

Here's my code:

#Program that detects dates in text and copies and prints them

import pyperclip, re
#DD/MM/YEAR format
dateRegex = re.compile(r'(\d\d)/(\d\d)/(\d\d\d\d)')
#text = str(pyperclip.paste())
text = 'Hello. Your birthday is on 29/02/1990. His birthday is on 40/09/1992 and her birthday is on 09/09/2000.'

matches = []
for groups in dateRegex.findall(text):
    day = groups[0]
    month = groups[1]
    year = groups[2]
    
    #convert to int for comparisons
    dayNum = int(day)
    monthNum = int(month)
    yearNum = int(year)
    
    #check if date and month values are valid
    if dayNum <= 31 and monthNum > 0 and monthNum <= 12:
        #months with 30 days
        if month in ('04', '06', '09', '11'):
            if not (dayNum > 0 and dayNum <= 30):
                print("Invalid date input")
                continue
        #February only
        if month == '02':
            #February doesn't have more than 29 days
            if dayNum > 29:
                continue
            if yearNum % 4 == 0:
                #leap years have 29 days in February
                if yearNum % 100 == 0 and yearNum % 400 != 0:
                    #not a leap year even if divisible by 4
                    if dayNum > 28:
                        continue
            else:
                if dayNum > 28:
                    continue
        #all other months have up to 31 days
        if month not in ('02', '04', '06', '09', '11'):
            if dayNum <= 0 and dayNum > 31:
                continue
    else:
        continue
    date = '/'.join([groups[0],groups[1],groups[2]])
    matches.append(date)

if len(matches) > 0:
    pyperclip.copy('\n'.join(matches))
    print('Copied to clipboard:')
    print('\n'.join(matches))
else:
    print('No dates found.')

I've tested it out with various different date strings and it works as far as I can tell. I wanted to know about better ways of doing this though. As a beginner and an amateur, I understand there might be methods of writing the above code that are better and I don't mind being guided in the right direction and learning more about them. What is a better way of doing all of the above without using so many if statements?