This commit is contained in:
bgme 2023-01-19 17:14:23 +08:00
commit aea788adf3
2 changed files with 350 additions and 0 deletions

173
.gitignore vendored Normal file
View file

@ -0,0 +1,173 @@
# Created by https://www.toptal.com/developers/gitignore/api/python
# Edit at https://www.toptal.com/developers/gitignore?templates=python
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
### Python Patch ###
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
poetry.toml
# End of https://www.toptal.com/developers/gitignore/api/python
/.idea

177
main.py Normal file
View file

@ -0,0 +1,177 @@
import base64
import logging
import re
import subprocess
import sys
from urllib.request import urlopen
PROXY_DNS_IP = '127.0.0.1'
PROXY_DNS_PORT = '5353'
DNSMASQ_RULES_FILE = '/tmp/dnsmasq.d/gfwlist'
# https://github.com/gfwlist/gfwlist
GFWLIST_URL_LIST = [
"https://raw.githubusercontent.com/gfwlist/gfwlist/master/gfwlist.txt",
"https://pagure.io/gfwlist/raw/master/f/gfwlist.txt",
"https://gitlab.com/gfwlist/gfwlist/raw/master/gfwlist.txt",
"https://git.tuxfamily.org/gfwlist/gfwlist.git/plain/gfwlist.txt",
"http://repo.or.cz/gfwlist.git/blob_plain/HEAD:/gfwlist.txt"
]
def get_gfwlist_text() -> str:
for url in GFWLIST_URL_LIST:
try:
logging.info('request {url}'.format(url=url))
with urlopen(url, timeout=15) as responsee:
return base64.b64decode(responsee.read()).decode('utf-8')
except:
pass
raise IOError("can't download gfwlist")
def is_comment(line: str) -> bool:
comment_re = re.compile(r'^!|\[AutoProxy')
return bool(comment_re.match(line))
def has_ip(line: str) -> bool:
# https://stackoverflow.com/questions/5284147/validating-ipv4-addresses-with-regexp
# https://stackoverflow.com/questions/53497/regular-expression-that-matches-valid-ipv6-addresses
ipv4_re = re.compile(r'((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.?\b){4}')
ipv6_re = re.compile(
r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))')
return bool(ipv4_re.findall(line)) or bool(ipv6_re.findall(line))
def is_exception(line: str) -> bool:
exception_re = re.compile(r'^@@')
return bool(exception_re.match(line))
def is_regular(line: str) -> bool:
regular_re = re.compile(r'^/')
return bool(regular_re.match(line))
def gfwlist_line_filter(line: str) -> bool:
line = line.strip()
return (line != '') and (not is_comment(line)) and (not has_ip(line)) \
and (not is_exception(line)) and (not is_regular(line))
def gfwlist_line_converter(line: str) -> str:
raw_line = line
line = line.strip()
line = re.sub(r'/$', '', line)
def invalid_rule_return():
logging.debug('invalid rule: ' + raw_line)
return ""
def convert_asterisk(line: str) -> str:
asterisk_re = re.compile(r'^[\w\-_]*\*[\w\-_]*\.')
# 替换开头的 *.
if re.match(asterisk_re, line):
line = asterisk_re.sub("", line)
# 移除中间含 * 的规则
if '*' in line:
return invalid_rule_return()
return line
# ||global.bing.com
# ||cdn*.i-scmp.com
if line.startswith('||'):
line = line.replace('||', "")
return convert_asterisk(line)
# |http://www.dmm.com/netgame
# |http://bbs.cantonese.asia/
# |http://www.dmm.com/netgame
# |http://*.1mobile.tw
# |http://*2.bahamut.com.tw
if line.startswith('|'):
line = line.replace('|', '')
line = re.sub(r'^http(s)?://', '', line)
# 移除含有 path 的规则
if '/' in line:
return invalid_rule_return()
return convert_asterisk(line)
# .casinobellini.com
# share.dmhy.org
# .ddns.net/
# bbs.sina.com%2F
# .amazon.com/Dalai-Lama
# amazon.com/Prisoner-State-Secret-Journal-Premier
# .keepandshare.com/visit/visit_page.php?i=688154
# .pentoy.hk/%E6%99%82%E4%BA%8B
# .ruanyifeng.com/blog*some_ways_to_break_the_great_firewall
# prisoner-state-secret-journal-premier
# q%3Dfreedom
# search*safeweb
# q=triangle
# ultrareach
# 移除非域名规则
if '.' not in line:
return invalid_rule_return()
# 移除 http 协议头
line = re.sub(r'^http(s)?://', '', line)
# 移除含 path 、含 params 的规则
for m in ['/', '?', '=']:
if m in line:
return invalid_rule_return()
# 移除非asci字符
if re.match(r'%\w\w', line):
return invalid_rule_return()
line = convert_asterisk(line)
# 移除域名最开头的 .
if line.startswith('.'):
line = re.sub(r'^\.', "", line)
return line
def get_gfwlist_hosts() -> set[str]:
gfwlist_text = get_gfwlist_text()
gfwlist_lines = gfwlist_text.splitlines()
return set(
filter(
lambda line: line != "",
map(
gfwlist_line_converter,
filter(gfwlist_line_filter, gfwlist_lines)
)
)
)
def get_dnsmasq_text() -> str:
rule_list = list(
map(
lambda host: "server=/{host}/{dns_ip}#{dns_port}".format(host=host, dns_ip=PROXY_DNS_IP,
dns_port=PROXY_DNS_PORT),
sorted(list(get_gfwlist_hosts()))
)
)
return '\n'.join(rule_list)
def main():
dnsmasq_text = get_dnsmasq_text()
with open(DNSMASQ_RULES_FILE, 'w') as f:
f.write(dnsmasq_text)
subprocess.run(["/etc/init.d/dnsmasq", "restart"])
if __name__ == '__main__':
logging.basicConfig(stream=sys.stderr, level=logging.INFO, format="%(levelname)s:%(message)s")
main()