Source code for myst_sphinx_gallery.images

"""A module to manage images in a MyST markdown/notebook/rst file."""

from __future__ import annotations

import base64
import io
import re
from pathlib import Path
from typing import Literal

import nbformat
from PIL import Image, ImageOps
from sphinx.util import logging

from .utils import ensure_dir_exists, print_run_time

OperationMap = {
    "contain": ImageOps.contain,
    "cover": ImageOps.cover,
    "fit": ImageOps.fit,
    "pad": ImageOps.pad,
}
SaveKwargs = {
    "format": "WebP",
    "lossless": False,
    "compression": 6,
}

logger = logging.getLogger(__name__)


[docs] class Thumbnail: """A class to manage the thumbnail image.""" _path: Path _image: Image.Image def __init__( self, image: Path | str | Image.Image, output_dir: Path | str, ref_size: tuple[int, int] | int = (320, 224), operation: Literal["thumbnail", "contain", "cover", "fit", "pad"] = "pad", max_animation_frames: int = 50, quality_static: int = 80, quality_animated: int = 15, operation_kwargs: dict[str, int] | None = None, save_kwargs: dict[str, int] | None = None, ) -> None: """Initialize the Thumbnail object. path : Path | str | Image.Image The path to the thumbnail image, or the PIL image object. output_dir : Path The directory to save the thumbnail image. ref_size : tuple[int, int] the reference size of the thumbnail image for output. operation : str The operation to perform on the image. See the Pillow documentation for more information: `<https://pillow.readthedocs.io/en/stable/handbook/tutorial.html#relative-resizing>`_ max_animation_frames : int The maximum number of frames to extract from an animated image. If the image has more frames, will sample the frames uniformly. quality_static : int The quality of the static image thumbnail. quality_animated : int The quality of the animated image thumbnail. operation_kwargs : dict The keyword arguments for the operation. save_kwargs : dict The keyword arguments for the save method. """ if operation_kwargs is None: operation_kwargs = {} if save_kwargs is None: save_kwargs = {} if isinstance(image, Image.Image): self._image = image if hasattr(image, "path"): self._path = Path(image.path) else: self._path = Path("no_image.png") elif isinstance(image, (str, Path)): self._path = Path(image) self._image = Image.open(image) else: msg = "image must be a path or PIL Image object" raise TypeError(msg) self.operation = operation self.operation_kwargs = operation_kwargs self._output_dir = Path(output_dir) self.max_animation_frames = max_animation_frames self.quality_static = quality_static self.quality_animated = quality_animated self._ref_size = self._format_size(ref_size) self._save_kwargs = self._format_save_kwargs(save_kwargs) def __str__(self) -> str: """Return the string representation of the object.""" return f"Thumbnail(path={self.path})" def __repr__(self) -> str: """Return the string representation of the object.""" return f"Thumbnail(path={self.path})" def _format_save_kwargs(self, save_kwargs: dict) -> dict: """Format the save keyword arguments.""" if not isinstance(save_kwargs, dict): msg = "save_kwargs must be a dictionary" raise TypeError(msg) kwargs = SaveKwargs.copy() if self.image.n_frames > 1: kwargs.update( { "quality": self.quality_animated, "save_all": True, "loop": 0, } ) else: kwargs.update({"quality": self.quality_static}) if self.operation == "pad": kwargs.update({"color": "white"}) kwargs.update(save_kwargs) return kwargs def _parse_frames(self) -> tuple[list[int], int]: """Parse the frames and duration of the output animated image.""" n_frames = self.image.n_frames max_frames = self.max_animation_frames if n_frames > max_frames: interval = n_frames // max_frames frames = list(range(0, n_frames, interval))[:max_frames] duration = self.image.info["duration"] * interval else: frames = range(n_frames) duration = self.image.info["duration"] return frames, duration def _format_size(self, size: tuple[int, int] | int) -> tuple[int, int]: """Format the size of the thumbnail image to a tuple of length 2.""" if isinstance(size, int): return size, size msg = "size must be a tuple of length 2" try: size = tuple(size) except Exception as e: raise ValueError(msg) from e if len(size) != 2: raise ValueError(msg) return size @property def path(self) -> Path: """The path to the thumbnail image.""" return self._path @property def output_dir(self) -> Path: """The directory to save the thumbnail image.""" return self._output_dir @property def auto_output_path(self) -> Path: """Automatically generated output path for the thumbnail image.""" out_file = self.output_dir / self.path.name return out_file.with_suffix(".thumbnail.webp") @property def image(self) -> Image.Image: """The thumbnail image.""" return self._image @property def ref_size(self) -> tuple[int, int]: """The reference size of the thumbnail image.""" return self._ref_size @property def save_kwargs(self) -> dict[str, int]: """The keyword arguments for the save method.""" return self._save_kwargs
[docs] def generate_thumbnail(self) -> Image.Image: """Generate the thumbnail image based on the operation.""" if self.operation == "thumbnail": thumbnail = self.image.copy() thumbnail.thumbnail(self.ref_size) thumbnail.info.clear() else: operate = OperationMap[self.operation] image = self.image if self.operation == "pad": image = image.convert("RGBA") thumbnail = operate(image, self.ref_size, **self.operation_kwargs) return thumbnail
[docs] @print_run_time def save_thumbnail(self, out_path: Path | None = None) -> Path: """Save the thumbnail image to the output directory. Parameters ---------- out_path : Path The path to save the thumbnail image. If None, the image will be saved with the same name as the original image. Returns ------- out_path : Path The path to the saved thumbnail image. """ out_path = self.auto_output_path if out_path is None else Path(out_path) if out_path.exists(): msg = f" Thumbnail {out_path} already exists. skipping..." logger.info(msg) return out_path ensure_dir_exists(out_path.parent) msg = f" Saving thumbnail to {out_path}" logger.info(msg) if self.image.n_frames > 1: frames_idx, duration = self._parse_frames() self.save_kwargs.update({"duration": duration}) # extract frames frames = [] for idx in frames_idx: self.image.seek(idx) frames.append(self.generate_thumbnail()) frames[0].save( out_path, append_images=frames[1:], **self.save_kwargs, ) else: thumbnail = self.generate_thumbnail() thumbnail.save(out_path, **self.save_kwargs) return out_path
[docs] class DocImages: """A class to manage images in a MyST markdown/notebook/rst file.""" _urls: list _alts: list _images: list[tuple[str, str]] def __init__(self, images: list[tuple[str, str]]) -> None: """Initialize the DocImages object. images : list[tuple[str, str]] A list of tuples, where each tuple contains the image url and alt text. """ self._images = images self._urls, self._alts = self._parse_images() def __len__(self) -> int: """Return the number of images.""" return len(self.images) def __str__(self) -> str: """Return the string representation of the object.""" return f"DocImages(images={len(self.images)})" def __repr__(self) -> str: """Return the string representation of the object.""" return f"DocImages(images={len(self.images)})" def __add__(self, other: DocImages) -> DocImages: """Concatenate two DocImages objects.""" if isinstance(other, DocImages): return DocImages(self.images + other.images) msg = f"unsupported operand type(s) for +: 'DocImages' and '{type(other)}'" raise TypeError(msg) def __hash__(self) -> int: """Return the hash value of the object.""" return hash(tuple(self.images)) def __eq__(self, other: DocImages) -> bool: """Check if two DocImages objects are equal.""" return self.images == other.images def __getitem__(self, idx: int) -> tuple[str]: """Return the image url at the specified index.""" return self.images[idx][0] def _parse_images(self) -> tuple[list[str], list[str]]: """Parse the images urls and alt text.""" if len(self.images) == 0: return [], [] urls, alts = zip(*self.images) return urls, alts @property def images(self) -> list[tuple[str, str]]: """A list of tuples, where each tuple contains the image url and alt text.""" return self._images @property def urls(self) -> list[str]: """A list of image urls.""" return self._urls @property def alts(self) -> list[str]: """A list of image alt text.""" return self._alts
[docs] def where(self, alt: str) -> list[int]: """Return the indices of the images with the specified alt text.""" return [i for i, a in enumerate(self.alts) if a == alt]
[docs] def sel_urls(self, alt: str) -> list[str]: """Return the urls of the images with the specified alt text.""" idx = self.where(alt) if len(idx) == 0: return [] return [self.urls[i] for i in idx]
[docs] class CellImages: """A class to manage images in a notebook code cell output.""" def __init__( self, notebook_file: Path, ) -> None: """Initialize the CellImages object.""" self._notebook_file = Path(notebook_file) self._images = self._extract_images() def _extract_images(self) -> list[Image.Image]: """Extract images from code cell outputs in a notebook.""" with self.notebook_file.open(encoding="utf-8") as f: notebook = nbformat.read(f, as_version=4) images = [] for cell in notebook.cells: if cell.cell_type == "code": for output in cell.outputs: if "data" in output and "image/png" in output.data: # get base64 encoded image data img_data = base64.b64decode(output.data["image/png"]) # convert to PIL image img = Image.open(io.BytesIO(img_data)) images.append(img) return images def __len__(self) -> int: """Return the number of images.""" return len(self.images) def __str__(self) -> str: """Return the string representation of the object.""" return f"CellImages(images={len(self.images)})" def __repr__(self) -> str: """Return the string representation of the object.""" return f"CellImages(images={len(self.images)})" def __getitem__(self, idx: int) -> Image.Image: """Return the image at the specified index.""" return self.images[idx] @property def images(self) -> list[Image.Image]: """A list of images extracted from the notebook.""" return self._images @property def notebook_file(self) -> Path: """The path to the notebook file.""" return self._notebook_file
[docs] def save_images(self, output_dir: Path) -> None: """Save the images to the output directory.""" output_dir = Path(output_dir) if not output_dir.exists(): output_dir.mkdir(parents=True, exist_ok=True) for i, img in enumerate(self.images): img.save(output_dir / f"{self.notebook_file.stem}_{i}.png")
[docs] def save_image(self, output_file: Path, index: int) -> None: """Save an image to the output directory. Parameters ---------- output_file : Path The output file. index : int The index of the image to save. """ output_file = Path(output_file) ensure_dir_exists(output_file.parent) img = self.images[index] img.save(output_file)
[docs] def parse_md_images(markdown_content: str) -> DocImages: """Parse the image information (url, alt) from a markdown content. Two types of markdown image syntax are supported: 1. Conventional markdown image syntax: ``![alt](img/xxx.png)`` 2. Myst markdown image/figure syntax: See `Images and figures <https://myst-parser.readthedocs.io/en/latest/syntax/images_and_figures.html>`_ for more details. .. warning:: The html image syntax is not supported. Parameters ---------- markdown_content : str The markdown content. Returns ------- images : DocImages A DocImages instance, which contains the image url and alt text. """ images = [] # case 1 (conventional markdown image syntax): ![alt](img/xxx.png) md_pattern = r"!\[(.*?)\]\((.*?)\)" for match in re.finditer(md_pattern, markdown_content): alt, url = match.groups() images.append((strip_str(url), strip_str(alt))) # case 2 (myst markdown image/figure syntax): myst_pattern = r"```\{(image|figure)\}\s+(.*?)\n(.*?)```" for match in re.finditer(myst_pattern, markdown_content, re.DOTALL): directive, url, options = match.groups() # find alt text alt_match = re.search(r":alt:\s*(.*?)\n", options) alt = alt_match.group(1).strip() if alt_match else "" images.append((strip_str(url), strip_str(alt))) return DocImages(images)
[docs] def parse_rst_images(rst_content: str) -> DocImages: """Parse the images (url, alt) from a reStructuredText content. rst image/figure syntax are supported: .. code-block:: rst .. image:: xxx.png :alt: xxxx .. figure:: xxx.png :alt: xxxx Parameters ---------- rst_content : str The reStructuredText content. Returns ------- images : DocImages A DocImages instance, which contains the image url and alt text. """ pattern = r"\.\.\s+(image|figure)::\s+(.*?)\n(?:\s+:.*?:\s*(.*?)\n)*" images = [] for match in re.finditer(pattern, rst_content, re.DOTALL): url = match.group(2).strip() # find alt text alt_match = re.search(r":alt:\s*(.*?)\n", match.group(0)) alt = alt_match.group(1).strip() if alt_match else "" images.append((strip_str(url), strip_str(alt))) return DocImages(images)
[docs] def strip_str(s: str) -> str: """Strip the string and remove quotes.""" return s.strip().strip('"').strip("'").strip("`").strip()