add hosts_deduplicate function
This commit is contained in:
parent
aea788adf3
commit
183e073927
37
main.py
37
main.py
|
@ -42,7 +42,7 @@ def has_ip(line: str) -> bool:
|
|||
ipv4_re = re.compile(r'((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.?\b){4}')
|
||||
ipv6_re = re.compile(
|
||||
r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))')
|
||||
return bool(ipv4_re.findall(line)) or bool(ipv6_re.findall(line))
|
||||
return bool(ipv4_re.search(line)) or bool(ipv6_re.search(line))
|
||||
|
||||
|
||||
def is_exception(line: str) -> bool:
|
||||
|
@ -127,7 +127,7 @@ def gfwlist_line_converter(line: str) -> str:
|
|||
if m in line:
|
||||
return invalid_rule_return()
|
||||
# 移除非asci字符
|
||||
if re.match(r'%\w\w', line):
|
||||
if re.search(r'%\w\w', line):
|
||||
return invalid_rule_return()
|
||||
|
||||
line = convert_asterisk(line)
|
||||
|
@ -139,10 +139,34 @@ def gfwlist_line_converter(line: str) -> str:
|
|||
return line
|
||||
|
||||
|
||||
def get_gfwlist_hosts() -> set[str]:
|
||||
def is_valid_hostname(domain: str) -> bool:
|
||||
# https://stackoverflow.com/questions/1418423/the-hostname-regex
|
||||
domain_re = re.compile(
|
||||
r'^(?=.{1,255}$)[0-9A-Za-z](?:(?:[0-9A-Za-z]|-){0,61}[0-9A-Za-z])?(?:\.[0-9A-Za-z](?:(?:[0-9A-Za-z]|-){0,61}[0-9A-Za-z])?)*\.?$')
|
||||
return bool(domain_re.match(domain))
|
||||
|
||||
|
||||
def hosts_deduplicate(hosts: list[str]) -> list[str]:
|
||||
hosts = list(set(hosts))
|
||||
for h in hosts.copy():
|
||||
if not is_valid_hostname(h):
|
||||
logging.warning('{host} is invalid!'.format(host=h))
|
||||
hosts.remove(h)
|
||||
|
||||
hosts_copy = hosts.copy()
|
||||
for v in hosts_copy:
|
||||
for k in hosts_copy:
|
||||
if k != v and k.endswith('.' + v):
|
||||
logging.debug('found duplicate: {k} {v}'.format(k=k, v=v))
|
||||
hosts.remove(k)
|
||||
|
||||
return hosts
|
||||
|
||||
|
||||
def get_gfwlist_hosts() -> list[str]:
|
||||
gfwlist_text = get_gfwlist_text()
|
||||
gfwlist_lines = gfwlist_text.splitlines()
|
||||
return set(
|
||||
gfwlist_hosts = list(
|
||||
filter(
|
||||
lambda line: line != "",
|
||||
map(
|
||||
|
@ -151,6 +175,9 @@ def get_gfwlist_hosts() -> set[str]:
|
|||
)
|
||||
)
|
||||
)
|
||||
gfwlist_hosts = hosts_deduplicate(gfwlist_hosts)
|
||||
logging.info('found {num} gfwlist host'.format(num=len(gfwlist_hosts)))
|
||||
return sorted(gfwlist_hosts)
|
||||
|
||||
|
||||
def get_dnsmasq_text() -> str:
|
||||
|
@ -158,7 +185,7 @@ def get_dnsmasq_text() -> str:
|
|||
map(
|
||||
lambda host: "server=/{host}/{dns_ip}#{dns_port}".format(host=host, dns_ip=PROXY_DNS_IP,
|
||||
dns_port=PROXY_DNS_PORT),
|
||||
sorted(list(get_gfwlist_hosts()))
|
||||
get_gfwlist_hosts()
|
||||
)
|
||||
)
|
||||
return '\n'.join(rule_list)
|
||||
|
|
Loading…
Reference in a new issue