Result
1 # vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
2
3 # Copyright 2014-2021 Florian Bruhin (The Compiler) <mail@qutebrowser.org>
4 #
5 # This file is part of qutebrowser.
6 #
7 # qutebrowser is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # qutebrowser is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with qutebrowser. If not, see <https://www.gnu.org/licenses/>.
19
20 """Functions related to host blocking."""
21
22 import os.path
23 import posixpath
24 import zipfile
25 import logging
26 import pathlib
27 from typing import cast, IO, Set
28
29 from PyQt5.QtCore import QUrl
30
31 from qutebrowser.api import (
32 hook,
33 config,
34 message,
35 interceptor,
36 apitypes,
37 qtutils,
38 )
39 from qutebrowser.components.utils import blockutils
40 from qutebrowser.utils import version # FIXME: Move needed parts into api namespace?
41
42
43 logger = logging.getLogger("network")
44 host_blocker = cast("HostBlocker", None)
45
46
47 def _guess_zip_filename(zf: zipfile.ZipFile) -> str:
48 """Guess which file to use inside a zip file."""
49 files = zf.namelist()
50 if len(files) == 1:
51 return files[0]
52 else:
53 for e in files:
54 if posixpath.splitext(e)[0].lower() == "hosts":
55 return e
56 raise FileNotFoundError("No hosts file found in zip")
57
58
59 def get_fileobj(byte_io: IO[bytes]) -> IO[bytes]:
60 """Get a usable file object to read the hosts file from."""
61 byte_io.seek(0) # rewind downloaded file
62 if zipfile.is_zipfile(byte_io):
63 byte_io.seek(0) # rewind what zipfile.is_zipfile did
64 zf = zipfile.ZipFile(byte_io)
65 filename = _guess_zip_filename(zf)
66 byte_io = zf.open(filename, mode="r")
67 else:
68 byte_io.seek(0) # rewind what zipfile.is_zipfile did
69 return byte_io
70
71
72 def _should_be_used() -> bool:
73 """Whether the hostblocker should be used or not."""
74 method = config.val.content.blocking.method
75
76 adblock_info = version.MODULE_INFO["adblock"]
77 adblock_usable = adblock_info.is_usable()
78
79 logger.debug(f"Configured adblock method {method}, adblock library usable: "
80 f"{adblock_usable}")
81 return method in ("both", "hosts") or (method == "auto" and not adblock_usable)
82
83
84 class HostBlocker:
85
86 """Manage blocked hosts based from /etc/hosts-like files.
87
88 Attributes:
89 enabled: Given the current blocking method, should the host blocker be enabled?
90 _blocked_hosts: A set of blocked hosts.
91 _config_blocked_hosts: A set of blocked hosts from ~/.config.
92 _local_hosts_file: The path to the blocked-hosts file.
93 _config_hosts_file: The path to a blocked-hosts in ~/.config
94 _has_basedir: Whether a custom --basedir is set.
95 """
96
97 def __init__(
98 self,
99 *,
100 data_dir: pathlib.Path,
101 config_dir: pathlib.Path,
102 has_basedir: bool = False
103 ) -> None:
104 self.enabled = _should_be_used()
105 self._has_basedir = has_basedir
106 self._blocked_hosts: Set[str] = set()
107 self._config_blocked_hosts: Set[str] = set()
108
109 self._local_hosts_file = str(data_dir / "blocked-hosts")
110 self.update_files()
111
112 self._config_hosts_file = str(config_dir / "blocked-hosts")
113
114 def _is_blocked(self, request_url: QUrl, first_party_url: QUrl = None) -> bool:
115 """Check whether the given request is blocked."""
116 if not self.enabled:
117 return False
118
119 if first_party_url is not None and not first_party_url.isValid():
120 first_party_url = None
121
122 qtutils.ensure_valid(request_url)
123
124 if not config.get("content.blocking.enabled", url=first_party_url):
125 return False
126
127 host = request_url.host()
128 return (
129 host in self._blocked_hosts or host in self._config_blocked_hosts
130 ) and not blockutils.is_whitelisted_url(request_url)
131
132 def filter_request(self, info: interceptor.Request) -> None:
133 """Block the given request if necessary."""
134 if self._is_blocked(
135 request_url=info.request_url, first_party_url=info.first_party_url
136 ):
137 logger.debug(
138 "Request to {} blocked by host blocker.".format(info.request_url.host())
139 )
140 info.block()
141
142 def _read_hosts_line(self, raw_line: bytes) -> Set[str]:
143 """Read hosts from the given line.
144
145 Args:
146 line: The bytes object to read.
147
148 Returns:
149 A set containing valid hosts found
150 in the line.
151 """
152 if raw_line.startswith(b"#"):
153 # Ignoring comments early so we don't have to care about
154 # encoding errors in them
155 return set()
156
157 line = raw_line.decode("utf-8")
158
159 # Remove comments
160 hash_idx = line.find("#")
161 line = line if hash_idx == -1 else line[:hash_idx]
162
163 parts = line.strip().split()
164 if len(parts) == 1:
165 # "one host per line" format
166 hosts = parts
167 else:
168 # /etc/hosts format
169 hosts = parts[1:]
170
171 filtered_hosts = set()
172 for host in hosts:
173 if "." in host and not host.endswith(".localdomain") and host != "0.0.0.0":
174 filtered_hosts.update([host])
175
176 return filtered_hosts
177
178 def _read_hosts_file(self, filename: str, target: Set[str]) -> bool:
179 """Read hosts from the given filename.
180
181 Args:
182 filename: The file to read.
183 target: The set to store the hosts in.
184
185 Return:
186 True if a read was attempted, False otherwise
187 """
188 if not os.path.exists(filename):
189 return False
190
191 try:
192 with open(filename, "rb") as f:
193 for line in f:
194 target |= self._read_hosts_line(line)
195
196 except (OSError, UnicodeDecodeError):
197 logger.exception("Failed to read host blocklist!")
198
199 return True
200
201 def read_hosts(self) -> None:
202 """Read hosts from the existing blocked-hosts file."""
203 self._blocked_hosts = set()
204
205 self._read_hosts_file(self._config_hosts_file, self._config_blocked_hosts)
206
207 found = self._read_hosts_file(self._local_hosts_file, self._blocked_hosts)
208
209 if not found:
210 if (
211 config.val.content.blocking.hosts.lists
212 and not self._has_basedir
213 and config.val.content.blocking.enabled
214 and self.enabled
215 ):
216 message.info("Run :adblock-update to get adblock lists.")
217
218 def adblock_update(self) -> blockutils.BlocklistDownloads:
219 """Update the adblock block lists."""
220 self._read_hosts_file(self._config_hosts_file, self._config_blocked_hosts)
221 self._blocked_hosts = set()
222
223 blocklists = config.val.content.blocking.hosts.lists
224 dl = blockutils.BlocklistDownloads(blocklists)
225 dl.single_download_finished.connect(self._merge_file)
226 dl.all_downloads_finished.connect(self._on_lists_downloaded)
227 dl.initiate()
228 return dl
229
230 def _merge_file(self, byte_io: IO[bytes]) -> None:
231 """Read and merge host files.
232
233 Args:
234 byte_io: The BytesIO object of the completed download.
235 """
236 error_count = 0
237 line_count = 0
238 try:
239 f = get_fileobj(byte_io)
240 except (OSError, zipfile.BadZipFile, zipfile.LargeZipFile, LookupError) as e:
241 message.error(
242 "hostblock: Error while reading {}: {} - {}".format(
243 byte_io.name, e.__class__.__name__, e
244 )
245 )
246 return
247
248 for line in f:
249 line_count += 1
250 try:
251 self._blocked_hosts |= self._read_hosts_line(line)
252 except UnicodeDecodeError:
253 logger.error("Failed to decode: {!r}".format(line))
254 error_count += 1
255
256 logger.debug("{}: read {} lines".format(byte_io.name, line_count))
257 if error_count > 0:
258 message.error(
259 "hostblock: {} read errors for {}".format(error_count, byte_io.name)
260 )
261
262 def _on_lists_downloaded(self, done_count: int) -> None:
263 """Install block lists after files have been downloaded."""
264 try:
265 with open(self._local_hosts_file, "w", encoding="utf-8") as f:
266 for host in sorted(self._blocked_hosts):
267 f.write(host + "\n")
268 message.info(
269 "hostblock: Read {} hosts from {} sources.".format(
270 len(self._blocked_hosts), done_count
271 )
272 )
273 except OSError:
274 logger.exception("Failed to write host block list!")
275
276 def update_files(self) -> None:
277 """Update files when the config changed."""
278 if not config.val.content.blocking.hosts.lists:
279 try:
280 os.remove(self._local_hosts_file)
281 except FileNotFoundError:
282 pass
283 except OSError as e:
284 logger.exception("Failed to delete hosts file: {}".format(e))
285
286
287 @hook.config_changed("content.blocking.hosts.lists")
288 def on_lists_changed() -> None:
289 host_blocker.update_files()
290
291
292 @hook.config_changed("content.blocking.method")
293 def on_method_changed() -> None:
294 host_blocker.enabled = _should_be_used()
295
296
297 @hook.init()
298 def init(context: apitypes.InitContext) -> None:
299 """Initialize the host blocker."""
300 global host_blocker
301 host_blocker = HostBlocker(
302 data_dir=context.data_dir,
303 config_dir=context.config_dir,
304 has_basedir=context.args.basedir is not None,
305 )
306 host_blocker.read_hosts()
307 interceptor.register(host_blocker.filter_request)
308