Untitled
unknown
plain_text
5 months ago
9.3 kB
34
No Index
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
boss_pedals.py: Scrape BOSS pedal images, extract dominant hue and brightness,
then generate a grid image sorted by hue (columns) and brightness (rows).
"""
import argparse
import os
import re
import math
import logging
import sys
from pathlib import Path
from io import BytesIO
from typing import List, Optional
import concurrent.futures
import colorsys
import requests
from bs4 import BeautifulSoup
from PIL import Image, ImageDraw, ImageFont
# Logging setup
logging.basicConfig(level=logging.INFO, format="%(levelname)s | %(message)s")
log = logging.getLogger(__name__)
# Constants
DEFAULT_URL = "http://pepemusic.de/boss_collection_website/d/data.html"
USER_AGENT = "Mozilla/5.0 (compatible; BossPedalScraper/1.0)"
TIMEOUT = 10
# Rainbow hue segmentation
RAINBOW_SEGMENTS = [
("red", 0, 15),
("orange", 15, 45),
("yellow", 45, 75),
("green", 75, 165),
("cyan", 165, 195),
("blue", 195, 255),
("violet", 255, 290),
("magenta", 290, 330),
("red2", 330, 360),
]
class Pedal:
__slots__ = ("name", "url", "local_path", "hue", "segment_index", "value")
def __init__(self, name: str, url: str):
self.name = name
self.url = url
self.local_path: Optional[Path] = None
self.hue: Optional[float] = None
self.segment_index: Optional[int] = None
self.value: Optional[float] = None
@property
def sanitized_name(self):
return re.sub(r"[^A-Za-z0-9_\- ]+", "", self.name).strip()
def build_session() -> requests.Session:
session = requests.Session()
session.headers.update({"User-Agent": USER_AGENT})
return session
def fetch_pedals(url: str, session: requests.Session) -> List[Pedal]:
log.info("Scraping %s", url)
resp = session.get(url, timeout=TIMEOUT)
resp.raise_for_status()
soup = BeautifulSoup(resp.content, "html.parser")
pedals = []
for img in soup.select("img[src]"):
src = img["src"]
name = img.get("alt") or Path(src).stem
full = requests.compat.urljoin(url, src)
pedals.append(Pedal(name.strip(), full))
log.info("Found %d pedal images.", len(pedals))
return pedals
def rgb_to_hue(r: float, g: float, b: float) -> float:
r, g, b = [x/255.0 for x in (r, g, b)]
mx, mn = max(r, g, b), min(r, g, b)
d = mx - mn
if d == 0:
return 0.0
if mx == r:
h = (g - b) / d % 6
elif mx == g:
h = (b - r) / d + 2
else:
h = (r - g) / d + 4
return h * 60
def compute_hue(img: Image.Image, thumb: int = 50) -> float:
img2 = img.copy()
img2.thumbnail((thumb, thumb), Image.LANCZOS)
pixels = list(img2.getdata())
r = sum(p[0] for p in pixels)/len(pixels)
g = sum(p[1] for p in pixels)/len(pixels)
b = sum(p[2] for p in pixels)/len(pixels)
return rgb_to_hue(r, g, b)
def compute_value(img: Image.Image, thumb: int = 40) -> float:
"""
Robust perceptual brightness estimator using luminance with saturation filtering.
Returns a value ∈ [0.0, 1.0].
"""
img = img.copy().resize((thumb, thumb), Image.LANCZOS)
pixels = list(img.getdata())
luminances = []
for r, g, b in pixels:
# Convert to HSV to exclude near-greys
maxc = max(r, g, b)
minc = min(r, g, b)
sat = (maxc - minc) / 255.0 if maxc > 0 else 0
if sat < 0.05:
continue # skip nearly gray pixels
# ITU-R BT.709 luminance
lum = (0.2126 * r + 0.7152 * g + 0.0722 * b) / 255.0
luminances.append(lum)
if not luminances:
return 0.0 # fallback if all gray
# Return robust stat: 25th percentile (darker part dominates)
luminances.sort()
q1 = luminances[len(luminances) // 4]
return q1
def compute_saturation(img: Image.Image, thumb: int = 40) -> float:
"""
Évalue la saturation dominante perçue dans l'image.
Retourne un float ∈ [0, 1].
"""
img = img.copy().resize((thumb, thumb), Image.LANCZOS)
pixels = list(img.getdata())
sats = []
for r, g, b in pixels:
h, s, v = colorsys.rgb_to_hsv(r / 255.0, g / 255.0, b / 255.0)
if v < 0.98 and v > 0.1: # ignore les quasi-noirs/blancs
sats.append(s)
if not sats:
return 0.0
# Option 1 : moyenne
return sum(sats) / len(sats)
# Option 2 (plus robuste) : percentile 75
# sats.sort()
# return sats[int(0.75 * len(sats))]
def hue_to_segment(hue: float) -> int:
for idx, (_, lo, hi) in enumerate(RAINBOW_SEGMENTS):
if lo <= hue < hi:
return idx
return len(RAINBOW_SEGMENTS)
def download_process(pedal: Pedal, out_dir: Path, session: requests.Session) -> Pedal:
try:
resp = session.get(pedal.url, timeout=TIMEOUT)
resp.raise_for_status()
img = Image.open(BytesIO(resp.content)).convert("RGB")
fname = out_dir / f"{pedal.sanitized_name}.png"
img.save(fname, "PNG")
pedal.local_path = fname
pedal.hue = compute_hue(img)
pedal.segment_index = hue_to_segment(pedal.hue)
#pedal.value = compute_value(img)
pedal.value = compute_saturation(img)
except Exception as e:
log.warning("Failed %s: %s", pedal.url, e)
return pedal
def compose(
pedals: List[Pedal],
cols: int,
thumb: int,
spacing: int,
font_path: Optional[str],
font_size: int,
output: Path,
) -> None:
"""Compose final grid image – hue↑→, value↑↓ sans trous internes."""
# --- Nettoyage & dérivés --------------------------------------------
pedals = [p for p in pedals if p.local_path]
if not pedals:
log.error("No pedals to compose.")
return
# 1. Hue global sort
pedals.sort(key=lambda p: p.hue or 999)
total = len(pedals)
rows = math.ceil(total / cols)
# 2. Découpage en segments consécutifs (même ordre que hue)
columns = [pedals[i * rows : (i + 1) * rows] for i in range(cols)]
# 3. Tri vertical par luminosité dans chaque colonne
for col in columns:
col.sort(key=lambda p: p.value or 1.0) # foncé → clair
# 4. Construction grille sans trous internes
grid: List[List[Optional[Pedal]]] = [[None] * cols for _ in range(rows)]
for c, col_data in enumerate(columns):
for r, p in enumerate(col_data):
grid[r][c] = p # remplissage top-down
# --- Mesures & canevas ----------------------------------------------
font = (
ImageFont.truetype(font_path, font_size)
if font_path
else ImageFont.load_default()
)
text_h = font.getbbox("Hg")[3]
cell_h = thumb + text_h + spacing
cell_w = thumb
W = cols * cell_w + (cols + 1) * spacing
H = rows * cell_h + (rows + 1) * spacing
canvas = Image.new("RGB", (W, H), "white")
draw = ImageDraw.Draw(canvas)
# --- Rendu -----------------------------------------------------------
for r in range(rows):
for c in range(cols):
p = grid[r][c]
x = spacing + c * (cell_w + spacing)
y = spacing + r * (cell_h + spacing)
if not p:
continue # uniquement possible dans la toute dernière colonne
img = (
Image.open(p.local_path)
.convert("RGB")
.resize((thumb, thumb), Image.LANCZOS)
)
canvas.paste(img, (x, y))
tw = draw.textlength(p.name, font=font)
tx = x + (thumb - tw) // 2
ty = y + thumb + (spacing // 2)
draw.text((tx, ty), p.name, font=font, fill="black")
canvas.save(output)
log.info("✅ Composite saved → %s", output)
def main():
parser = argparse.ArgumentParser(description="Scrape and compose BOSS pedals.")
parser.add_argument("--url", default=DEFAULT_URL)
parser.add_argument("--out-dir", default="boss_images")
parser.add_argument("--cols", type=int, default=12)
parser.add_argument("--thumb", type=int, default=200)
parser.add_argument("--spacing", type=int, default=10)
parser.add_argument("--font", default=None)
parser.add_argument("--font-size", type=int, default=16)
parser.add_argument("--workers", type=int, default=8)
parser.add_argument("--output", default="boss_pedals_sorted.png")
args = parser.parse_args()
out_dir = Path(args.out_dir)
out_dir.mkdir(exist_ok=True)
session = build_session()
pedals = fetch_pedals(args.url, session)
with concurrent.futures.ThreadPoolExecutor(max_workers=args.workers) as ex:
pedals = list(ex.map(lambda p: download_process(p, out_dir, session), pedals))
compose(
pedals,
cols=args.cols,
thumb=args.thumb,
spacing=args.spacing,
font_path=args.font,
font_size=args.font_size,
output=Path(args.output),
)
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
log.warning("Interrupted by user.")
Editor is loading...
Leave a Comment