I have a list of keywords and I want to validate if any of these keywords are inside a file containing more than 100,000 domain names. For faster processing, I want to implement multiprocessing so that each keyword can be validated in parallel.
My code doesn't seem to be working well as single processing is much faster. What's wrong? :(
import time
from multiprocessing import Pool
def multiprocessing_func(keyword):
# File containing more than 100k domain names
# URL: https://raw.githubusercontent.com/CERT-MZ/projects/master/Domain-squatting/domain-names.txt
file_domains = open("domain-names.txt", "r")
for domain in file_domains:
if keyword in domain:
print("similar domain identified:", domain)
# Rewind the file, start from the begining
file_domains.seek(0)
if __name__ == '__main__':
starttime = time.time()
# Keywords to check
keywords = ["google","facebook", "amazon", "microsoft", "netflix"]
# Create a multiprocessing Pool
pool = Pool()
for keyword in keywords:
print("Checking keyword:", keyword)
# Without multiprocessing pool
#multiprocessing_func(keyword)
# With multiprocessing pool
pool.map(multiprocessing_func, keyword)
# Total run time
print('That took {} seconds'.format(time.time() - starttime))
pool.map(multiprocessing_func, keywords)