commit aea788adf3d45d34ee6bcd5fed3778446c2311a8
Author: bgme <i@bgme.me>
Date:   Thu Jan 19 17:14:23 2023 +0800

    init

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..902a101
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,173 @@
+# Created by https://www.toptal.com/developers/gitignore/api/python
+# Edit at https://www.toptal.com/developers/gitignore?templates=python
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+
+
+# End of https://www.toptal.com/developers/gitignore/api/python
+
+/.idea
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..10b35f3
--- /dev/null
+++ b/main.py
@@ -0,0 +1,177 @@
+import base64
+import logging
+import re
+import subprocess
+import sys
+from urllib.request import urlopen
+
+PROXY_DNS_IP = '127.0.0.1'
+PROXY_DNS_PORT = '5353'
+
+DNSMASQ_RULES_FILE = '/tmp/dnsmasq.d/gfwlist'
+
+# https://github.com/gfwlist/gfwlist
+GFWLIST_URL_LIST = [
+    "https://raw.githubusercontent.com/gfwlist/gfwlist/master/gfwlist.txt",
+    "https://pagure.io/gfwlist/raw/master/f/gfwlist.txt",
+    "https://gitlab.com/gfwlist/gfwlist/raw/master/gfwlist.txt",
+    "https://git.tuxfamily.org/gfwlist/gfwlist.git/plain/gfwlist.txt",
+    "http://repo.or.cz/gfwlist.git/blob_plain/HEAD:/gfwlist.txt"
+]
+
+
+def get_gfwlist_text() -> str:
+    for url in GFWLIST_URL_LIST:
+        try:
+            logging.info('request {url}'.format(url=url))
+            with urlopen(url, timeout=15) as responsee:
+                return base64.b64decode(responsee.read()).decode('utf-8')
+        except:
+            pass
+    raise IOError("can't download gfwlist")
+
+
+def is_comment(line: str) -> bool:
+    comment_re = re.compile(r'^!|\[AutoProxy')
+    return bool(comment_re.match(line))
+
+
+def has_ip(line: str) -> bool:
+    # https://stackoverflow.com/questions/5284147/validating-ipv4-addresses-with-regexp
+    # https://stackoverflow.com/questions/53497/regular-expression-that-matches-valid-ipv6-addresses
+    ipv4_re = re.compile(r'((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.?\b){4}')
+    ipv6_re = re.compile(
+        r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))')
+    return bool(ipv4_re.findall(line)) or bool(ipv6_re.findall(line))
+
+
+def is_exception(line: str) -> bool:
+    exception_re = re.compile(r'^@@')
+    return bool(exception_re.match(line))
+
+
+def is_regular(line: str) -> bool:
+    regular_re = re.compile(r'^/')
+    return bool(regular_re.match(line))
+
+
+def gfwlist_line_filter(line: str) -> bool:
+    line = line.strip()
+    return (line != '') and (not is_comment(line)) and (not has_ip(line)) \
+        and (not is_exception(line)) and (not is_regular(line))
+
+
+def gfwlist_line_converter(line: str) -> str:
+    raw_line = line
+
+    line = line.strip()
+    line = re.sub(r'/$', '', line)
+
+    def invalid_rule_return():
+        logging.debug('invalid rule: ' + raw_line)
+        return ""
+
+    def convert_asterisk(line: str) -> str:
+        asterisk_re = re.compile(r'^[\w\-_]*\*[\w\-_]*\.')
+        # 替换开头的 *.
+        if re.match(asterisk_re, line):
+            line = asterisk_re.sub("", line)
+        # 移除中间含 * 的规则
+        if '*' in line:
+            return invalid_rule_return()
+        return line
+
+    # ||global.bing.com
+    # ||cdn*.i-scmp.com
+    if line.startswith('||'):
+        line = line.replace('||', "")
+        return convert_asterisk(line)
+
+    # |http://www.dmm.com/netgame
+    # |http://bbs.cantonese.asia/
+    # |http://www.dmm.com/netgame
+    # |http://*.1mobile.tw
+    # |http://*2.bahamut.com.tw
+    if line.startswith('|'):
+        line = line.replace('|', '')
+        line = re.sub(r'^http(s)?://', '', line)
+        # 移除含有 path 的规则
+        if '/' in line:
+            return invalid_rule_return()
+        return convert_asterisk(line)
+
+    # .casinobellini.com
+    # share.dmhy.org
+    # .ddns.net/
+    # bbs.sina.com%2F
+    # .amazon.com/Dalai-Lama
+    # amazon.com/Prisoner-State-Secret-Journal-Premier
+    # .keepandshare.com/visit/visit_page.php?i=688154
+    # .pentoy.hk/%E6%99%82%E4%BA%8B
+    # .ruanyifeng.com/blog*some_ways_to_break_the_great_firewall
+    # prisoner-state-secret-journal-premier
+    # q%3Dfreedom
+    # search*safeweb
+    # q=triangle
+    # ultrareach
+
+    # 移除非域名规则
+    if '.' not in line:
+        return invalid_rule_return()
+
+    # 移除 http 协议头
+    line = re.sub(r'^http(s)?://', '', line)
+
+    # 移除含 path 、含 params 的规则
+    for m in ['/', '?', '=']:
+        if m in line:
+            return invalid_rule_return()
+    # 移除非asci字符
+    if re.match(r'%\w\w', line):
+        return invalid_rule_return()
+
+    line = convert_asterisk(line)
+
+    # 移除域名最开头的 .
+    if line.startswith('.'):
+        line = re.sub(r'^\.', "", line)
+
+    return line
+
+
+def get_gfwlist_hosts() -> set[str]:
+    gfwlist_text = get_gfwlist_text()
+    gfwlist_lines = gfwlist_text.splitlines()
+    return set(
+        filter(
+            lambda line: line != "",
+            map(
+                gfwlist_line_converter,
+                filter(gfwlist_line_filter, gfwlist_lines)
+            )
+        )
+    )
+
+
+def get_dnsmasq_text() -> str:
+    rule_list = list(
+        map(
+            lambda host: "server=/{host}/{dns_ip}#{dns_port}".format(host=host, dns_ip=PROXY_DNS_IP,
+                                                                     dns_port=PROXY_DNS_PORT),
+            sorted(list(get_gfwlist_hosts()))
+        )
+    )
+    return '\n'.join(rule_list)
+
+
+def main():
+    dnsmasq_text = get_dnsmasq_text()
+    with open(DNSMASQ_RULES_FILE, 'w') as f:
+        f.write(dnsmasq_text)
+    subprocess.run(["/etc/init.d/dnsmasq", "restart"])
+
+
+if __name__ == '__main__':
+    logging.basicConfig(stream=sys.stderr, level=logging.INFO, format="%(levelname)s:%(message)s")
+
+    main()