Skip to content

core

ruff_sync.core

Core logic for ruff-sync.

__all__ module-attribute

__all__ = [
    "Config",
    "FetchResult",
    "check",
    "fetch_upstream_config",
    "get_ruff_config",
    "get_ruff_tool_table",
    "is_ruff_toml_file",
    "merge_ruff_toml",
    "pull",
    "resolve_raw_url",
    "resolve_target_path",
    "to_git_url",
    "toml_ruff_parse",
]

LOGGER module-attribute

LOGGER = getLogger(__name__)

get_ruff_tool_table module-attribute

get_ruff_tool_table = get_ruff_config

FetchResult

Bases: NamedTuple

Result of fetching an upstream configuration.

Source code in src/ruff_sync/core.py
class FetchResult(NamedTuple):
    """Result of fetching an upstream configuration."""

    buffer: StringIO
    resolved_upstream: URL

buffer instance-attribute

buffer

resolved_upstream instance-attribute

resolved_upstream

Config

Bases: TypedDict

Configuration schema for [tool.ruff-sync] in pyproject.toml.

Source code in src/ruff_sync/core.py
class Config(TypedDict, total=False):
    """Configuration schema for [tool.ruff-sync] in pyproject.toml."""

    upstream: str
    to: str
    source: str  # Deprecated
    exclude: list[str]
    verbose: int
    branch: str
    path: str
    semantic: bool
    diff: bool
    init: bool

upstream instance-attribute

upstream

to instance-attribute

to

source instance-attribute

source

exclude instance-attribute

exclude

verbose instance-attribute

verbose

branch instance-attribute

branch

path instance-attribute

path

semantic instance-attribute

semantic

diff instance-attribute

diff

init instance-attribute

init

resolve_target_path

resolve_target_path(to, upstream_url=None)

Resolve the target path for configuration files.

If 'to' is a file, it's used directly. Otherwise, it looks for existing ruff/pyproject.toml in the 'to' directory. If none found, it defaults to pyproject.toml unless the upstream is a ruff.toml.

Source code in src/ruff_sync/core.py
def resolve_target_path(to: pathlib.Path, upstream_url: str | URL | None = None) -> pathlib.Path:
    """Resolve the target path for configuration files.

    If 'to' is a file, it's used directly.
    Otherwise, it looks for existing ruff/pyproject.toml in the 'to' directory.
    If none found, it defaults to pyproject.toml unless the upstream is a ruff.toml.
    """
    if to.is_file():
        return to

    # If it's a directory, look for common config files
    for filename in ("ruff.toml", ".ruff.toml", "pyproject.toml"):
        candidate = to / filename
        if candidate.exists():
            return candidate

    # If upstream is specified and is a ruff.toml, default to ruff.toml
    if upstream_url and is_ruff_toml_file(upstream_url):
        return to / "ruff.toml"

    return to / "pyproject.toml"

is_git_url

is_git_url(url)

Return True if the URL should be treated as a git repository.

Source code in src/ruff_sync/core.py
def is_git_url(url: URL) -> bool:
    """Return True if the URL should be treated as a git repository."""
    return str(url).startswith("git@") or url.scheme in ("ssh", "git", "git+ssh")

to_git_url

to_git_url(url)

Attempt to convert a browser or raw URL to a git (SSH) URL.

Supports GitHub and GitLab.

Source code in src/ruff_sync/core.py
def to_git_url(url: URL) -> URL | None:
    """Attempt to convert a browser or raw URL to a git (SSH) URL.

    Supports GitHub and GitLab.
    """
    if is_git_url(url):
        return url

    if url.host in _GITHUB_HOSTS or url.host == _GITHUB_RAW_HOST:
        path_parts = [p for p in url.path.split("/") if p]
        if len(path_parts) >= _GITHUB_REPO_PATH_PARTS_COUNT:
            org, repo = path_parts[:_GITHUB_REPO_PATH_PARTS_COUNT]
            repo = repo.removesuffix(".git")
            return URL(f"git@github.com:{org}/{repo}.git")

    if url.host in _GITLAB_HOSTS:
        path = url.path.strip("/")
        project_path = path.split("/-/")[0] if "/-/" in path else path
        if project_path:
            project_path = project_path.removesuffix(".git")
            return URL(f"git@{url.host}:{project_path}.git")

    return None

resolve_raw_url

resolve_raw_url(url, branch='main', path=None)

Convert a GitHub or GitLab repository/blob URL to a raw content URL.

Parameters:

Name Type Description Default
url URL

The URL to resolve.

required
branch str

The default branch to use for repo URLs.

'main'
path str | None

The directory prefix for pyproject.toml.

None

Returns:

Name Type Description
URL URL

The resolved raw content URL, or the original URL if no conversion applies.

Source code in src/ruff_sync/core.py
def resolve_raw_url(url: URL, branch: str = "main", path: str | None = None) -> URL:
    """Convert a GitHub or GitLab repository/blob URL to a raw content URL.

    Args:
        url (URL): The URL to resolve.
        branch (str): The default branch to use for repo URLs.
        path (str | None): The directory prefix for pyproject.toml.

    Returns:
        URL: The resolved raw content URL, or the original URL if no conversion applies.

    """
    # If it's a git URL, leave it alone; we'll handle it via git clone
    if is_git_url(url):
        return url
    LOGGER.debug(f"Initial URL: {url}")
    if url.host in _GITHUB_HOSTS:
        return _convert_github_url(url, branch=branch, path=path or "")
    if url.host in _GITLAB_HOSTS:
        return _convert_gitlab_url(url, branch=branch, path=path or "")
    return url

download async

download(url, client)

Download a file from a URL and return a StringIO object.

Source code in src/ruff_sync/core.py
async def download(url: URL, client: httpx.AsyncClient) -> StringIO:
    """Download a file from a URL and return a StringIO object."""
    response = await client.get(url)
    response.raise_for_status()
    return StringIO(response.text)

fetch_upstream_config async

fetch_upstream_config(url, client, branch, path)

Fetch the upstream pyproject.toml either via HTTP or git clone.

Source code in src/ruff_sync/core.py
async def fetch_upstream_config(
    url: URL, client: httpx.AsyncClient, branch: str, path: str | None
) -> FetchResult:
    """Fetch the upstream pyproject.toml either via HTTP or git clone."""
    if is_git_url(url):
        LOGGER.info(f"Cloning {url} via git...")
        return await asyncio.to_thread(_fetch_via_git, url, branch, path)

    try:
        return await _download_with_discovery(url, client, branch)
    except httpx.HTTPStatusError as err:
        msg = f"HTTP error {err.response.status_code} when downloading from {url}"
        git_url = to_git_url(url)
        if git_url:
            # sys.argv[1] might be -v or something else when running via pytest
            try:
                cmd = sys.argv[1]
                if cmd not in ("pull", "check"):
                    cmd = "pull"
            except IndexError:
                cmd = "pull"
            msg += (
                f"\n\n💡 Check the URL and your permissions. "
                "You might want to try cloning via git instead:\n\n"
                f"   ruff-sync {cmd} {git_url}"
            )
        else:
            msg += "\n\n💡 Check the URL and your permissions."

        # Re-raise with a more helpful message while preserving the original exception context
        raise httpx.HTTPStatusError(msg, request=err.request, response=err.response) from None

is_ruff_toml_file

is_ruff_toml_file(path_or_url)

Return True if the path or URL indicates a ruff.toml file.

This handles: - Plain paths (e.g. "ruff.toml", ".ruff.toml", "configs/ruff.toml") - URLs with query strings or fragments (e.g. "ruff.toml?ref=main", "ruff.toml#L10") by examining only the path component (or the part before any query/fragment).

Source code in src/ruff_sync/core.py
def is_ruff_toml_file(path_or_url: str | URL) -> bool:
    """Return True if the path or URL indicates a ruff.toml file.

    This handles:
    - Plain paths (e.g. "ruff.toml", ".ruff.toml", "configs/ruff.toml")
    - URLs with query strings or fragments (e.g. "ruff.toml?ref=main", "ruff.toml#L10")
    by examining only the path component (or the part before any query/fragment).
    """
    parsed = urlparse(str(path_or_url))

    # If it's a URL with a scheme/netloc, use the parsed path component.
    # Otherwise, fall back to stripping any query/fragment from the raw string.
    if parsed.scheme or parsed.netloc:
        path = parsed.path
    else:
        path = str(path_or_url).split("?", 1)[0].split("#", 1)[0]

    return pathlib.Path(path).name in ("ruff.toml", ".ruff.toml")

get_ruff_config

get_ruff_config(
    toml: str | TOMLDocument,
    is_ruff_toml: bool = ...,
    create_if_missing: Literal[True] = ...,
    exclude: Iterable[str] = ...,
) -> TOMLDocument | Table
get_ruff_config(
    toml: str | TOMLDocument,
    is_ruff_toml: bool = ...,
    create_if_missing: Literal[False] = ...,
    exclude: Iterable[str] = ...,
) -> TOMLDocument | Table | None
get_ruff_config(
    toml,
    is_ruff_toml=False,
    create_if_missing=True,
    exclude=(),
)

Get the ruff section or document from a TOML string.

If it does not exist and it is a pyproject.toml, create it.

Source code in src/ruff_sync/core.py
def get_ruff_config(
    toml: str | TOMLDocument,
    is_ruff_toml: bool = False,
    create_if_missing: bool = True,
    exclude: Iterable[str] = (),
) -> TOMLDocument | Table | None:
    """Get the ruff section or document from a TOML string.

    If it does not exist and it is a pyproject.toml, create it.
    """
    if isinstance(toml, str):
        doc: TOMLDocument = tomlkit.parse(toml)
    else:
        doc = toml

    if is_ruff_toml:
        _apply_exclusions(doc, exclude)
        return doc

    try:
        tool: Table = doc["tool"]  # type: ignore[assignment]
        ruff = tool["ruff"]
        LOGGER.debug("Found `tool.ruff` section.")
    except KeyError:
        if not create_if_missing:
            return None
        LOGGER.info("✨ No `tool.ruff` section found, creating it.")
        tool = table(True)
        ruff = table()
        tool.append("ruff", ruff)
        doc.append("tool", tool)
    if not isinstance(ruff, Table):
        msg = f"Expected table, got {type(ruff)}"
        raise TypeError(msg)
    _apply_exclusions(ruff, exclude)
    return ruff

toml_ruff_parse

toml_ruff_parse(toml_s, exclude)

Parse a TOML string for the tool.ruff section excluding certain ruff configs.

Source code in src/ruff_sync/core.py
def toml_ruff_parse(toml_s: str, exclude: Iterable[str]) -> TOMLDocument:
    """Parse a TOML string for the tool.ruff section excluding certain ruff configs."""
    ruff_toml: TOMLDocument = tomlkit.parse(toml_s)["tool"]["ruff"]  # type: ignore[index,assignment]
    for section in exclude:
        LOGGER.info(f"Excluding section `lint.{section}` from ruff config.")
        ruff_toml["lint"].pop(section, None)  # type: ignore[union-attr]
    return ruff_toml

merge_ruff_toml

merge_ruff_toml(
    source, upstream_ruff_doc, is_ruff_toml=False
)

Merge the source and upstream tool ruff config with better whitespace preservation.

Examples:

>>> from tomlkit import parse
>>> source = parse("[tool.ruff]\nline-length = 80")
>>> upstream = parse("[tool.ruff]\nline-length = 100")["tool"]["ruff"]
>>> merged = merge_ruff_toml(source, upstream)
>>> print(merged.as_string())
[tool.ruff]
line-length = 100
Source code in src/ruff_sync/core.py
def merge_ruff_toml(
    source: TOMLDocument,
    upstream_ruff_doc: TOMLDocument | Table | None,
    is_ruff_toml: bool = False,
) -> TOMLDocument:
    r"""Merge the source and upstream tool ruff config with better whitespace preservation.

    Examples:
        >>> from tomlkit import parse
        >>> source = parse("[tool.ruff]\nline-length = 80")
        >>> upstream = parse("[tool.ruff]\nline-length = 100")["tool"]["ruff"]
        >>> merged = merge_ruff_toml(source, upstream)
        >>> print(merged.as_string())
        [tool.ruff]
        line-length = 100
    """
    if not upstream_ruff_doc:
        LOGGER.warning("No upstream ruff config section found.")
        return source

    if is_ruff_toml:
        _recursive_update(source, upstream_ruff_doc)
        return source

    source_tool_ruff = get_ruff_config(source, create_if_missing=True)

    _recursive_update(source_tool_ruff, upstream_ruff_doc)

    # Add a blank separator line after the ruff section — but only when another
    # top-level section follows it. Adding \n\n at end-of-file is unnecessary.
    doc_str = source.as_string()
    ruff_start = doc_str.find("[tool.ruff]")
    # Look for any non-ruff top-level section header after [tool.ruff]
    ruff_is_last = ruff_start == -1 or not re.search(
        r"^\[(?!tool\.ruff)", doc_str[ruff_start:], re.MULTILINE
    )
    if not ruff_is_last and not source_tool_ruff.as_string().endswith("\n\n"):
        source_tool_ruff.add(tomlkit.nl())

    return source

check async

check(args)

Check if the local pyproject.toml / ruff.toml is in sync with the upstream.

Returns:

Name Type Description
int int

0 if in sync, 1 if out of sync.

Examples:

>>> import asyncio
>>> from ruff_sync.cli import Arguments
>>> from httpx import URL
>>> import pathlib
>>> args = Arguments(
...     command="check",
...     upstream=URL("https://github.com/org/repo/blob/main/pyproject.toml"),
...     to=pathlib.Path("pyproject.toml"),
...     exclude=[],
... )
>>> # asyncio.run(check(args))
Source code in src/ruff_sync/core.py
async def check(
    args: Arguments,
) -> int:
    """Check if the local pyproject.toml / ruff.toml is in sync with the upstream.

    Returns:
        int: 0 if in sync, 1 if out of sync.

    Examples:
        >>> import asyncio
        >>> from ruff_sync.cli import Arguments
        >>> from httpx import URL
        >>> import pathlib
        >>> args = Arguments(
        ...     command="check",
        ...     upstream=URL("https://github.com/org/repo/blob/main/pyproject.toml"),
        ...     to=pathlib.Path("pyproject.toml"),
        ...     exclude=[],
        ... )
        >>> # asyncio.run(check(args))
    """
    print("🔍 Checking Ruff sync status...")

    _source_toml_path = resolve_target_path(args.to, args.upstream).resolve(strict=False)
    if not _source_toml_path.exists():
        print(
            f"❌ Configuration file {_source_toml_path} does not exist. "
            "Run 'ruff-sync pull' to create it."
        )
        return 1

    source_toml_file = TOMLFile(_source_toml_path)
    source_doc = source_toml_file.read()

    async with httpx.AsyncClient() as client:
        fetch_result = await fetch_upstream_config(
            args.upstream, client, branch=args.branch, path=args.path
        )
        LOGGER.info(f"Loaded upstream file from {fetch_result.resolved_upstream}")

    is_upstream_ruff_toml = is_ruff_toml_file(fetch_result.resolved_upstream)
    is_source_ruff_toml = is_ruff_toml_file(_source_toml_path.name)

    upstream_ruff_toml = get_ruff_config(
        fetch_result.buffer.read(),
        is_ruff_toml=is_upstream_ruff_toml,
        create_if_missing=False,
        exclude=args.exclude,
    )

    # Create a copy for comparison
    source_doc_copy = tomlkit.parse(source_doc.as_string())
    merged_doc = merge_ruff_toml(
        source_doc_copy,
        upstream_ruff_toml,
        is_ruff_toml=is_source_ruff_toml,
    )

    if args.semantic:
        if is_source_ruff_toml:
            source_ruff = source_doc
            merged_ruff = merged_doc
        else:
            source_ruff = source_doc.get("tool", {}).get("ruff")
            merged_ruff = merged_doc.get("tool", {}).get("ruff")

        # Compare unwrapped versions
        source_val = source_ruff.unwrap() if source_ruff is not None else None
        merged_val = merged_ruff.unwrap() if merged_ruff is not None else None

        if source_val == merged_val:
            print("✅ Ruff configuration is semantically in sync.")
            return 0
    elif source_doc.as_string() == merged_doc.as_string():
        print("✅ Ruff configuration is in sync.")
        return 0

    try:
        rel_path = _source_toml_path.relative_to(pathlib.Path.cwd())
    except ValueError:
        rel_path = _source_toml_path
    print(f"❌ Ruff configuration at {rel_path} is out of sync!")
    if args.diff:
        if args.semantic:
            # Semantic diff of the managed section
            from_lines = json.dumps(source_val, indent=2, sort_keys=True).splitlines(keepends=True)
            to_lines = json.dumps(merged_val, indent=2, sort_keys=True).splitlines(keepends=True)
            from_file = "local (semantic)"
            to_file = "upstream (semantic)"
        else:
            # Full text diff of the file
            from_lines = source_doc.as_string().splitlines(keepends=True)
            to_lines = merged_doc.as_string().splitlines(keepends=True)
            from_file = f"local/{_source_toml_path.name}"
            to_file = f"upstream/{_source_toml_path.name}"

        diff = difflib.unified_diff(
            from_lines,
            to_lines,
            fromfile=from_file,
            tofile=to_file,
        )
        sys.stdout.writelines(diff)
    return 1

pull async

pull(args)

Pull the upstream ruff config and apply it to the source.

Returns:

Name Type Description
int int

0 on success, 1 on failure.

Examples:

>>> import asyncio
>>> from ruff_sync.cli import Arguments
>>> from httpx import URL
>>> import pathlib
>>> args = Arguments(
...     command="pull",
...     upstream=URL("https://github.com/org/repo/blob/main/pyproject.toml"),
...     to=pathlib.Path("pyproject.toml"),
...     exclude=["lint.isort"],
...     init=True,
... )
>>> # asyncio.run(pull(args))
Source code in src/ruff_sync/core.py
async def pull(
    args: Arguments,
) -> int:
    """Pull the upstream ruff config and apply it to the source.

    Returns:
        int: 0 on success, 1 on failure.

    Examples:
        >>> import asyncio
        >>> from ruff_sync.cli import Arguments
        >>> from httpx import URL
        >>> import pathlib
        >>> args = Arguments(
        ...     command="pull",
        ...     upstream=URL("https://github.com/org/repo/blob/main/pyproject.toml"),
        ...     to=pathlib.Path("pyproject.toml"),
        ...     exclude=["lint.isort"],
        ...     init=True,
        ... )
        >>> # asyncio.run(pull(args))
    """
    print("🔄 Syncing Ruff...")
    _source_toml_path = resolve_target_path(args.to, args.upstream).resolve(strict=False)

    source_toml_file = TOMLFile(_source_toml_path)
    if _source_toml_path.exists():
        source_doc = source_toml_file.read()
    elif args.init:
        LOGGER.info(f"✨ Target file {_source_toml_path} does not exist, creating it.")
        source_doc = tomlkit.document()
        # Scaffold the file immediately to ensure we can write to the enclosing directory
        try:
            _source_toml_path.parent.mkdir(parents=True, exist_ok=True)
            _source_toml_path.touch()
        except OSError as e:
            print(f"❌ Failed to create {_source_toml_path}: {e}", file=sys.stderr)
            return 1
    else:
        print(
            f"❌ Configuration file {_source_toml_path} does not exist. "
            "Pass the '--init' flag to create it."
        )
        return 1

    # NOTE: there's no particular reason to use async here.
    async with httpx.AsyncClient() as client:
        fetch_result = await fetch_upstream_config(
            args.upstream, client, branch=args.branch, path=args.path
        )
        LOGGER.info(f"Loaded upstream file from {fetch_result.resolved_upstream}")

    is_upstream_ruff_toml = is_ruff_toml_file(fetch_result.resolved_upstream)
    is_source_ruff_toml = is_ruff_toml_file(_source_toml_path.name)

    upstream_ruff_toml = get_ruff_config(
        fetch_result.buffer.read(),
        is_ruff_toml=is_upstream_ruff_toml,
        create_if_missing=False,
        exclude=args.exclude,
    )
    merged_toml = merge_ruff_toml(
        source_doc,
        upstream_ruff_toml,
        is_ruff_toml=is_source_ruff_toml,
    )
    source_toml_file.write(merged_toml)
    try:
        rel_path = _source_toml_path.resolve().relative_to(pathlib.Path.cwd())
    except ValueError:
        rel_path = _source_toml_path.resolve()
    print(f"✅ Updated {rel_path}")
    return 0