"""Support for parsing GitHub URLs (which might be user provided) into constituent parts."""

from __future__ import annotations

import re
from dataclasses import dataclass
from enum import IntEnum

from wandb.sdk.launch.errors import LaunchError

PREFIX_HTTPS = "https://"
PREFIX_SSH = "git@"
SUFFIX_GIT = ".git"


GIT_COMMIT_REGEX = re.compile(r"[0-9a-f]{40}")


class ReferenceType(IntEnum):
    BRANCH = 1
    COMMIT = 2


def _parse_netloc(netloc: str) -> tuple[str | None, str | None, str]:
    """Parse netloc into username, password, and host.

    github.com => None, None, "@github.com"
    username@github.com => "username", None, "github.com"
    username:password@github.com => "username", "password", "github.com"
    """
    parts = netloc.split("@", 1)
    if len(parts) == 1:
        return None, None, parts[0]
    auth, host = parts
    parts = auth.split(":", 1)
    if len(parts) == 1:
        return parts[0], None, host
    return parts[0], parts[1], host


@dataclass
class GitReference:
    def __init__(self, remote: str, ref: str | None = None) -> None:
        """Initialize a reference from a remote and ref.

        Arguments:
            remote: A remote URL or URI.
            ref: A branch, tag, or commit hash.
        """
        self.uri = remote
        self.ref = ref

    @property
    def url(self) -> str | None:
        return self.uri

    def fetch(self, dst_dir: str) -> None:
        """Fetch the repo into dst_dir and refine githubref based on what we learn."""
        # We defer importing git until the last moment, because the import requires that the git
        # executable is available on the PATH, so we only want to fail if we actually need it.
        import git  # type: ignore

        repo = git.Repo.init(dst_dir)
        self.path = repo.working_dir
        origin = repo.create_remote("origin", self.uri or "")

        try:
            # We fetch the origin so that we have branch and tag references
            origin.fetch()
        except git.exc.GitCommandError as e:
            raise LaunchError(
                f"Unable to fetch from git remote repository {self.url}:\n{e}"
            )

        ref: git.RemoteReference | str
        if self.ref:
            if self.ref in origin.refs:
                ref = origin.refs[self.ref]
            else:
                ref = self.ref
            head = repo.create_head(self.ref, ref)
            head.checkout()
            self.commit_hash = head.commit.hexsha

        else:
            # TODO: Is there a better way to do this?
            default_branch = None
            for ref in repo.references:
                if hasattr(ref, "tag"):  # Skip tag references
                    continue
                refname = ref.name
                if refname.startswith("origin/"):  # Trim off "origin/"
                    refname = refname[7:]
                if refname == "main":
                    default_branch = "main"
                    break
                if refname == "master":
                    default_branch = "master"
                    # Keep looking in case we also have a main, which we let take precedence
                    # (While the references appear to be sorted, not clear if that's guaranteed.)
            if not default_branch:
                raise LaunchError(
                    f"Unable to determine branch or commit to checkout from {self.url}"
                )
            self.default_branch = default_branch
            self.ref = default_branch
            head = repo.create_head(default_branch, origin.refs[default_branch])
            head.checkout()
            self.commit_hash = head.commit.hexsha
        repo.submodule_update(init=True, recursive=True)
