Skip to content
Snippets Groups Projects
Commit ecdf8d88 authored by Raphael Bialon's avatar Raphael Bialon
Browse files

Add first version of google fonts check

parents
No related branches found
No related tags found
No related merge requests found
Pipfile 0 → 100644
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"
[packages]
requests = "*"
beautifulsoup4 = "*"
[dev-packages]
[requires]
python_version = "3.10"
{
"_meta": {
"hash": {
"sha256": "73352366ebbba8a1b3952ae48124bd44d7414d5f62dd76c10b00d5772e972461"
},
"pipfile-spec": 6,
"requires": {
"python_version": "3.10"
},
"sources": [
{
"name": "pypi",
"url": "https://pypi.org/simple",
"verify_ssl": true
}
]
},
"default": {
"beautifulsoup4": {
"hashes": [
"sha256:58d5c3d29f5a36ffeb94f02f0d786cd53014cf9b3b3951d42e0080d8a9498d30",
"sha256:ad9aa55b65ef2808eb405f46cf74df7fcb7044d5cbc26487f96eb2ef2e436693"
],
"index": "pypi",
"version": "==4.11.1"
},
"certifi": {
"hashes": [
"sha256:0d9c601124e5a6ba9712dbc60d9c53c21e34f5f641fe83002317394311bdce14",
"sha256:90c1a32f1d68f940488354e36370f6cca89f0f106db09518524c88d6ed83f382"
],
"markers": "python_version >= '3.6'",
"version": "==2022.9.24"
},
"charset-normalizer": {
"hashes": [
"sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845",
"sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f"
],
"markers": "python_full_version >= '3.6.0'",
"version": "==2.1.1"
},
"idna": {
"hashes": [
"sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4",
"sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"
],
"markers": "python_version >= '3.5'",
"version": "==3.4"
},
"requests": {
"hashes": [
"sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983",
"sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"
],
"index": "pypi",
"version": "==2.28.1"
},
"soupsieve": {
"hashes": [
"sha256:3b2503d3c7084a42b1ebd08116e5f81aadfaea95863628c80a3b774a11b7c759",
"sha256:fc53893b3da2c33de295667a0e19f078c14bf86544af307354de5fcf12a3f30d"
],
"markers": "python_version >= '3.6'",
"version": "==2.3.2.post1"
},
"urllib3": {
"hashes": [
"sha256:3fa96cf423e6987997fc326ae8df396db2a8b7c667747d47ddd8ecba91f4a74e",
"sha256:b930dd878d5a8afb066a637fbb35144fe7901e3b209d1cd4f524bd0e9deee997"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' and python_version < '4'",
"version": "==1.26.12"
}
},
"develop": {}
}
# Google Fonts Check
This tool searches for references to Google Fonts dependencies included by
```html
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=[…]">
```
for all entries in urls.txt.
This project uses Pipenv to set up the virtual environment for Python 3.
Use
```bash
pipenv run python check.py
```
check.py 0 → 100644
import concurrent.futures
import re
from threading import Lock
import requests as r
from bs4 import BeautifulSoup
DOMAINS_FILE = "domains.txt"
google_fonts_matcher = re.compile(r"https://fonts\.googleapis\.com/css2?\?family=.*")
lock = Lock()
tasks_total = 0
tasks_completed = 0
def main() -> None:
global tasks_total
with open(DOMAINS_FILE) as f:
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = [executor.submit(check_google_fonts_usage, domain) for domain in f.read().split()]
tasks_total = len(futures)
for future in futures:
future.add_done_callback(progress_indicator)
results = [future.result() for future in concurrent.futures.as_completed(futures)]
for url, _, duration in sorted(filter(lambda x: not x[1], results), key=lambda x: x[1]):
if duration < 0:
print(f"{url}: Connection Error")
else:
print(f"{url} does not use Google Fonts (took {duration}s)")
for url, _, duration in sorted(filter(lambda x: x[1], results), key=lambda x: x[1]):
print(f"{url} uses Google Fonts (took {duration}s)")
def progress_indicator(future: concurrent.futures.Future):
global lock, tasks_total, tasks_completed
with lock:
tasks_completed += 1
print(f"{tasks_completed}/{tasks_total} completed, {tasks_total - tasks_completed} remain.", end="\r")
def check_google_fonts_usage(domain: str) -> (str, str, float):
url = f"https://{domain}/"
try:
response = r.get(url)
except r.exceptions.ConnectionError:
url = f"http://{domain}/"
try:
response = r.get(url)
except r.exceptions.ConnectionError:
return url, False, -1.0
if response.status_code == r.codes.ok:
soup = BeautifulSoup(response.text, "html.parser")
return url, uses_google_fonts(soup), response.elapsed.total_seconds()
else:
return url, False, -1.0
def uses_google_fonts(soup: BeautifulSoup) -> (str, str):
for link in soup("link", rel="stylesheet", href=True):
if google_fonts_matcher.match(link.get("href")):
return True
return False
if __name__ == "__main__":
main()
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment