Files
clients/WebBasedCrawler/rankcheckin.py

41 lines
1.1 KiB
Python

#!/usr/bin/python3
import sys
import os.path
if "__main__" == __name__:
if not (len(sys.argv) == 3 or len(sys.argv) == 4):
print("Usage : python {0} file1 file2 [ranknum]".format(sys.argv[0]))
exit(1)
if not os.path.isfile(sys.argv[1]) or not os.path.isfile(sys.argv[2]):
print("check files :\nfile1 : {0}\nfile2 : {1}".format(sys.argv[1], sys.argv[2]))
exit(1)
rank1 = []
rank2 = []
if len(sys.argv) == 4 and sys.argv[3].isnumeric():
rank = int(sys.argv[3])
else:
rank = 1000
with open(sys.argv[1]) as f:
for line in f:
rank1.append(line[line.index('http'):].replace('\n', ''))
with open(sys.argv[2]) as f:
for line in f:
rank2.append(line[line.index('http'):].replace('\n', ''))
count = 0
if rank <= len(rank1) and rank <= len(rank2):
for url in rank1[:rank]:
if url in rank2[:rank]:
count += 1
else:
for url in rank1:
if url in rank2:
count += 1
print(count)