commit aea788adf3d45d34ee6bcd5fed3778446c2311a8 Author: bgme <i@bgme.me> Date: Thu Jan 19 17:14:23 2023 +0800 init diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..902a101 --- /dev/null +++ b/.gitignore @@ -0,0 +1,173 @@ +# Created by https://www.toptal.com/developers/gitignore/api/python +# Edit at https://www.toptal.com/developers/gitignore?templates=python + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +### Python Patch ### +# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration +poetry.toml + + +# End of https://www.toptal.com/developers/gitignore/api/python + +/.idea diff --git a/main.py b/main.py new file mode 100644 index 0000000..10b35f3 --- /dev/null +++ b/main.py @@ -0,0 +1,177 @@ +import base64 +import logging +import re +import subprocess +import sys +from urllib.request import urlopen + +PROXY_DNS_IP = '127.0.0.1' +PROXY_DNS_PORT = '5353' + +DNSMASQ_RULES_FILE = '/tmp/dnsmasq.d/gfwlist' + +# https://github.com/gfwlist/gfwlist +GFWLIST_URL_LIST = [ + "https://raw.githubusercontent.com/gfwlist/gfwlist/master/gfwlist.txt", + "https://pagure.io/gfwlist/raw/master/f/gfwlist.txt", + "https://gitlab.com/gfwlist/gfwlist/raw/master/gfwlist.txt", + "https://git.tuxfamily.org/gfwlist/gfwlist.git/plain/gfwlist.txt", + "http://repo.or.cz/gfwlist.git/blob_plain/HEAD:/gfwlist.txt" +] + + +def get_gfwlist_text() -> str: + for url in GFWLIST_URL_LIST: + try: + logging.info('request {url}'.format(url=url)) + with urlopen(url, timeout=15) as responsee: + return base64.b64decode(responsee.read()).decode('utf-8') + except: + pass + raise IOError("can't download gfwlist") + + +def is_comment(line: str) -> bool: + comment_re = re.compile(r'^!|\[AutoProxy') + return bool(comment_re.match(line)) + + +def has_ip(line: str) -> bool: + # https://stackoverflow.com/questions/5284147/validating-ipv4-addresses-with-regexp + # https://stackoverflow.com/questions/53497/regular-expression-that-matches-valid-ipv6-addresses + ipv4_re = re.compile(r'((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.?\b){4}') + ipv6_re = re.compile( + r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))') + return bool(ipv4_re.findall(line)) or bool(ipv6_re.findall(line)) + + +def is_exception(line: str) -> bool: + exception_re = re.compile(r'^@@') + return bool(exception_re.match(line)) + + +def is_regular(line: str) -> bool: + regular_re = re.compile(r'^/') + return bool(regular_re.match(line)) + + +def gfwlist_line_filter(line: str) -> bool: + line = line.strip() + return (line != '') and (not is_comment(line)) and (not has_ip(line)) \ + and (not is_exception(line)) and (not is_regular(line)) + + +def gfwlist_line_converter(line: str) -> str: + raw_line = line + + line = line.strip() + line = re.sub(r'/$', '', line) + + def invalid_rule_return(): + logging.debug('invalid rule: ' + raw_line) + return "" + + def convert_asterisk(line: str) -> str: + asterisk_re = re.compile(r'^[\w\-_]*\*[\w\-_]*\.') + # 替换开头的 *. + if re.match(asterisk_re, line): + line = asterisk_re.sub("", line) + # 移除中间含 * 的规则 + if '*' in line: + return invalid_rule_return() + return line + + # ||global.bing.com + # ||cdn*.i-scmp.com + if line.startswith('||'): + line = line.replace('||', "") + return convert_asterisk(line) + + # |http://www.dmm.com/netgame + # |http://bbs.cantonese.asia/ + # |http://www.dmm.com/netgame + # |http://*.1mobile.tw + # |http://*2.bahamut.com.tw + if line.startswith('|'): + line = line.replace('|', '') + line = re.sub(r'^http(s)?://', '', line) + # 移除含有 path 的规则 + if '/' in line: + return invalid_rule_return() + return convert_asterisk(line) + + # .casinobellini.com + # share.dmhy.org + # .ddns.net/ + # bbs.sina.com%2F + # .amazon.com/Dalai-Lama + # amazon.com/Prisoner-State-Secret-Journal-Premier + # .keepandshare.com/visit/visit_page.php?i=688154 + # .pentoy.hk/%E6%99%82%E4%BA%8B + # .ruanyifeng.com/blog*some_ways_to_break_the_great_firewall + # prisoner-state-secret-journal-premier + # q%3Dfreedom + # search*safeweb + # q=triangle + # ultrareach + + # 移除非域名规则 + if '.' not in line: + return invalid_rule_return() + + # 移除 http 协议头 + line = re.sub(r'^http(s)?://', '', line) + + # 移除含 path 、含 params 的规则 + for m in ['/', '?', '=']: + if m in line: + return invalid_rule_return() + # 移除非asci字符 + if re.match(r'%\w\w', line): + return invalid_rule_return() + + line = convert_asterisk(line) + + # 移除域名最开头的 . + if line.startswith('.'): + line = re.sub(r'^\.', "", line) + + return line + + +def get_gfwlist_hosts() -> set[str]: + gfwlist_text = get_gfwlist_text() + gfwlist_lines = gfwlist_text.splitlines() + return set( + filter( + lambda line: line != "", + map( + gfwlist_line_converter, + filter(gfwlist_line_filter, gfwlist_lines) + ) + ) + ) + + +def get_dnsmasq_text() -> str: + rule_list = list( + map( + lambda host: "server=/{host}/{dns_ip}#{dns_port}".format(host=host, dns_ip=PROXY_DNS_IP, + dns_port=PROXY_DNS_PORT), + sorted(list(get_gfwlist_hosts())) + ) + ) + return '\n'.join(rule_list) + + +def main(): + dnsmasq_text = get_dnsmasq_text() + with open(DNSMASQ_RULES_FILE, 'w') as f: + f.write(dnsmasq_text) + subprocess.run(["/etc/init.d/dnsmasq", "restart"]) + + +if __name__ == '__main__': + logging.basicConfig(stream=sys.stderr, level=logging.INFO, format="%(levelname)s:%(message)s") + + main()