from urllib.parse import urlparsefrom googlesearch import searchurls = [ # "55street.net", # "easter.ne.jp", # "finito-web.com", # "ojiji.net", # "zero-yen.com", "fc2web.com", # "k-free.net", # "gooside.com", # "ktplan.net", # "kt.fc2.com", # "zero-city.com", # "k-server.org", # "land.to"]results = search(f"site:*.{urls[0]}", num_results=10000, unique=True, safe=None, sleep_interval=5, region="ja")parsed_urls = []for s in results: parsed_url = urlparse(s) domain = f"{parsed_url.scheme}://{parsed_url.netloc}/" parsed_urls.append(domain) print(domain, flush=True)unique_urls = list(set(parsed_urls))filename = f"{urls[0]}.txt"with open(filename, 'w') as file: for url in unique_urls: file.write(url + '\n')print(f"saved {filename}")
wget --spider --max-redirect=0 -a wget.log --tries=1 --wait=0.4 -i list.txt
--2025-04-04 21:32:16-- http://toukei135.fc2web.com/Resolving toukei135.fc2web.com (toukei135.fc2web.com)... 199.48.208.133Connecting to toukei135.fc2web.com (toukei135.fc2web.com)|199.48.208.133|:80... connected.HTTP request sent, awaiting response... 302 FoundLocation: http://error.fc2.com/web/ [following]0 redirections exceeded.