import base64
import logging
import re
import subprocess
import sys
from urllib.request import urlopen

PROXY_DNS_IP = '127.0.0.1'
PROXY_DNS_PORT = '5353'

DNSMASQ_RULES_FILE = '/tmp/dnsmasq.d/gfwlist'

# https://github.com/gfwlist/gfwlist
GFWLIST_URL_LIST = [
    "https://raw.githubusercontent.com/gfwlist/gfwlist/master/gfwlist.txt",
    "https://pagure.io/gfwlist/raw/master/f/gfwlist.txt",
    "https://gitlab.com/gfwlist/gfwlist/raw/master/gfwlist.txt",
    "https://git.tuxfamily.org/gfwlist/gfwlist.git/plain/gfwlist.txt",
    "http://repo.or.cz/gfwlist.git/blob_plain/HEAD:/gfwlist.txt"
]


def get_gfwlist_text() -> str:
    for url in GFWLIST_URL_LIST:
        try:
            logging.info('request {url}'.format(url=url))
            with urlopen(url, timeout=15) as responsee:
                return base64.b64decode(responsee.read()).decode('utf-8')
        except:
            pass
    raise IOError("can't download gfwlist")


def is_comment(line: str) -> bool:
    comment_re = re.compile(r'^!|\[AutoProxy')
    return bool(comment_re.match(line))


def has_ip(line: str) -> bool:
    # https://stackoverflow.com/questions/5284147/validating-ipv4-addresses-with-regexp
    # https://stackoverflow.com/questions/53497/regular-expression-that-matches-valid-ipv6-addresses
    ipv4_re = re.compile(r'((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.?\b){4}')
    ipv6_re = re.compile(
        r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))')
    return bool(ipv4_re.findall(line)) or bool(ipv6_re.findall(line))


def is_exception(line: str) -> bool:
    exception_re = re.compile(r'^@@')
    return bool(exception_re.match(line))


def is_regular(line: str) -> bool:
    regular_re = re.compile(r'^/')
    return bool(regular_re.match(line))


def gfwlist_line_filter(line: str) -> bool:
    line = line.strip()
    return (line != '') and (not is_comment(line)) and (not has_ip(line)) \
        and (not is_exception(line)) and (not is_regular(line))


def gfwlist_line_converter(line: str) -> str:
    raw_line = line

    line = line.strip()
    line = re.sub(r'/$', '', line)

    def invalid_rule_return():
        logging.debug('invalid rule: ' + raw_line)
        return ""

    def convert_asterisk(line: str) -> str:
        asterisk_re = re.compile(r'^[\w\-_]*\*[\w\-_]*\.')
        # 替换开头的 *.
        if re.match(asterisk_re, line):
            line = asterisk_re.sub("", line)
        # 移除中间含 * 的规则
        if '*' in line:
            return invalid_rule_return()
        return line

    # ||global.bing.com
    # ||cdn*.i-scmp.com
    if line.startswith('||'):
        line = line.replace('||', "")
        return convert_asterisk(line)

    # |http://www.dmm.com/netgame
    # |http://bbs.cantonese.asia/
    # |http://www.dmm.com/netgame
    # |http://*.1mobile.tw
    # |http://*2.bahamut.com.tw
    if line.startswith('|'):
        line = line.replace('|', '')
        line = re.sub(r'^http(s)?://', '', line)
        # 移除含有 path 的规则
        if '/' in line:
            return invalid_rule_return()
        return convert_asterisk(line)

    # .casinobellini.com
    # share.dmhy.org
    # .ddns.net/
    # bbs.sina.com%2F
    # .amazon.com/Dalai-Lama
    # amazon.com/Prisoner-State-Secret-Journal-Premier
    # .keepandshare.com/visit/visit_page.php?i=688154
    # .pentoy.hk/%E6%99%82%E4%BA%8B
    # .ruanyifeng.com/blog*some_ways_to_break_the_great_firewall
    # prisoner-state-secret-journal-premier
    # q%3Dfreedom
    # search*safeweb
    # q=triangle
    # ultrareach

    # 移除非域名规则
    if '.' not in line:
        return invalid_rule_return()

    # 移除 http 协议头
    line = re.sub(r'^http(s)?://', '', line)

    # 移除含 path 、含 params 的规则
    for m in ['/', '?', '=']:
        if m in line:
            return invalid_rule_return()
    # 移除非asci字符
    if re.match(r'%\w\w', line):
        return invalid_rule_return()

    line = convert_asterisk(line)

    # 移除域名最开头的 .
    if line.startswith('.'):
        line = re.sub(r'^\.', "", line)

    return line


def get_gfwlist_hosts() -> set[str]:
    gfwlist_text = get_gfwlist_text()
    gfwlist_lines = gfwlist_text.splitlines()
    return set(
        filter(
            lambda line: line != "",
            map(
                gfwlist_line_converter,
                filter(gfwlist_line_filter, gfwlist_lines)
            )
        )
    )


def get_dnsmasq_text() -> str:
    rule_list = list(
        map(
            lambda host: "server=/{host}/{dns_ip}#{dns_port}".format(host=host, dns_ip=PROXY_DNS_IP,
                                                                     dns_port=PROXY_DNS_PORT),
            sorted(list(get_gfwlist_hosts()))
        )
    )
    return '\n'.join(rule_list)


def main():
    dnsmasq_text = get_dnsmasq_text()
    with open(DNSMASQ_RULES_FILE, 'w') as f:
        f.write(dnsmasq_text)
    subprocess.run(["/etc/init.d/dnsmasq", "restart"])


if __name__ == '__main__':
    logging.basicConfig(stream=sys.stderr, level=logging.INFO, format="%(levelname)s:%(message)s")

    main()