index.js 7.24 KB
'use strict'
const url = require('url')
const gitHosts = require('./git-host-info.js')
const GitHost = module.exports = require('./git-host.js')
const LRU = require('lru-cache')
const cache = new LRU({ max: 1000 })

const protocolToRepresentationMap = {
  'git+ssh:': 'sshurl',
  'git+https:': 'https',
  'ssh:': 'sshurl',
  'git:': 'git',
}

function protocolToRepresentation (protocol) {
  return protocolToRepresentationMap[protocol] || protocol.slice(0, -1)
}

const authProtocols = {
  'git:': true,
  'https:': true,
  'git+https:': true,
  'http:': true,
  'git+http:': true,
}

const knownProtocols = Object.keys(gitHosts.byShortcut)
  .concat(['http:', 'https:', 'git:', 'git+ssh:', 'git+https:', 'ssh:'])

module.exports.fromUrl = function (giturl, opts) {
  if (typeof giturl !== 'string') {
    return
  }

  const key = giturl + JSON.stringify(opts || {})

  if (!cache.has(key)) {
    cache.set(key, fromUrl(giturl, opts))
  }

  return cache.get(key)
}

function fromUrl (giturl, opts) {
  if (!giturl) {
    return
  }

  const url = isGitHubShorthand(giturl) ? 'github:' + giturl : correctProtocol(giturl)
  const parsed = parseGitUrl(url)
  if (!parsed) {
    return parsed
  }

  const gitHostShortcut = gitHosts.byShortcut[parsed.protocol]
  const gitHostDomain =
    gitHosts.byDomain[parsed.hostname.startsWith('www.') ?
      parsed.hostname.slice(4) :
      parsed.hostname]
  const gitHostName = gitHostShortcut || gitHostDomain
  if (!gitHostName) {
    return
  }

  const gitHostInfo = gitHosts[gitHostShortcut || gitHostDomain]
  let auth = null
  if (authProtocols[parsed.protocol] && (parsed.username || parsed.password)) {
    auth = `${parsed.username}${parsed.password ? ':' + parsed.password : ''}`
  }

  let committish = null
  let user = null
  let project = null
  let defaultRepresentation = null

  try {
    if (gitHostShortcut) {
      let pathname = parsed.pathname.startsWith('/') ? parsed.pathname.slice(1) : parsed.pathname
      const firstAt = pathname.indexOf('@')
      // we ignore auth for shortcuts, so just trim it out
      if (firstAt > -1) {
        pathname = pathname.slice(firstAt + 1)
      }

      const lastSlash = pathname.lastIndexOf('/')
      if (lastSlash > -1) {
        user = decodeURIComponent(pathname.slice(0, lastSlash))
        // we want nulls only, never empty strings
        if (!user) {
          user = null
        }
        project = decodeURIComponent(pathname.slice(lastSlash + 1))
      } else {
        project = decodeURIComponent(pathname)
      }

      if (project.endsWith('.git')) {
        project = project.slice(0, -4)
      }

      if (parsed.hash) {
        committish = decodeURIComponent(parsed.hash.slice(1))
      }

      defaultRepresentation = 'shortcut'
    } else {
      if (!gitHostInfo.protocols.includes(parsed.protocol)) {
        return
      }

      const segments = gitHostInfo.extract(parsed)
      if (!segments) {
        return
      }

      user = segments.user && decodeURIComponent(segments.user)
      project = decodeURIComponent(segments.project)
      committish = decodeURIComponent(segments.committish)
      defaultRepresentation = protocolToRepresentation(parsed.protocol)
    }
  } catch (err) {
    /* istanbul ignore else */
    if (err instanceof URIError) {
      return
    } else {
      throw err
    }
  }

  return new GitHost(gitHostName, user, auth, project, committish, defaultRepresentation, opts)
}

// accepts input like git:github.com:user/repo and inserts the // after the first :
const correctProtocol = (arg) => {
  const firstColon = arg.indexOf(':')
  const proto = arg.slice(0, firstColon + 1)
  if (knownProtocols.includes(proto)) {
    return arg
  }

  const firstAt = arg.indexOf('@')
  if (firstAt > -1) {
    if (firstAt > firstColon) {
      return `git+ssh://${arg}`
    } else {
      return arg
    }
  }

  const doubleSlash = arg.indexOf('//')
  if (doubleSlash === firstColon + 1) {
    return arg
  }

  return arg.slice(0, firstColon + 1) + '//' + arg.slice(firstColon + 1)
}

// look for github shorthand inputs, such as npm/cli
const isGitHubShorthand = (arg) => {
  // it cannot contain whitespace before the first #
  // it cannot start with a / because that's probably an absolute file path
  // but it must include a slash since repos are username/repository
  // it cannot start with a . because that's probably a relative file path
  // it cannot start with an @ because that's a scoped package if it passes the other tests
  // it cannot contain a : before a # because that tells us that there's a protocol
  // a second / may not exist before a #
  const firstHash = arg.indexOf('#')
  const firstSlash = arg.indexOf('/')
  const secondSlash = arg.indexOf('/', firstSlash + 1)
  const firstColon = arg.indexOf(':')
  const firstSpace = /\s/.exec(arg)
  const firstAt = arg.indexOf('@')

  const spaceOnlyAfterHash = !firstSpace || (firstHash > -1 && firstSpace.index > firstHash)
  const atOnlyAfterHash = firstAt === -1 || (firstHash > -1 && firstAt > firstHash)
  const colonOnlyAfterHash = firstColon === -1 || (firstHash > -1 && firstColon > firstHash)
  const secondSlashOnlyAfterHash = secondSlash === -1 || (firstHash > -1 && secondSlash > firstHash)
  const hasSlash = firstSlash > 0
  // if a # is found, what we really want to know is that the character
  // immediately before # is not a /
  const doesNotEndWithSlash = firstHash > -1 ? arg[firstHash - 1] !== '/' : !arg.endsWith('/')
  const doesNotStartWithDot = !arg.startsWith('.')

  return spaceOnlyAfterHash && hasSlash && doesNotEndWithSlash &&
    doesNotStartWithDot && atOnlyAfterHash && colonOnlyAfterHash &&
    secondSlashOnlyAfterHash
}

// attempt to correct an scp style url so that it will parse with `new URL()`
const correctUrl = (giturl) => {
  const firstAt = giturl.indexOf('@')
  const lastHash = giturl.lastIndexOf('#')
  let firstColon = giturl.indexOf(':')
  let lastColon = giturl.lastIndexOf(':', lastHash > -1 ? lastHash : Infinity)

  let corrected
  if (lastColon > firstAt) {
    // the last : comes after the first @ (or there is no @)
    // like it would in:
    // proto://hostname.com:user/repo
    // username@hostname.com:user/repo
    // :password@hostname.com:user/repo
    // username:password@hostname.com:user/repo
    // proto://username@hostname.com:user/repo
    // proto://:password@hostname.com:user/repo
    // proto://username:password@hostname.com:user/repo
    // then we replace the last : with a / to create a valid path
    corrected = giturl.slice(0, lastColon) + '/' + giturl.slice(lastColon + 1)
    // // and we find our new : positions
    firstColon = corrected.indexOf(':')
    lastColon = corrected.lastIndexOf(':')
  }

  if (firstColon === -1 && giturl.indexOf('//') === -1) {
    // we have no : at all
    // as it would be in:
    // username@hostname.com/user/repo
    // then we prepend a protocol
    corrected = `git+ssh://${corrected}`
  }

  return corrected
}

// try to parse the url as its given to us, if that throws
// then we try to clean the url and parse that result instead
// THIS FUNCTION SHOULD NEVER THROW
const parseGitUrl = (giturl) => {
  let result
  try {
    result = new url.URL(giturl)
  } catch (err) {}

  if (result) {
    return result
  }

  const correctedUrl = correctUrl(giturl)
  try {
    result = new url.URL(correctedUrl)
  } catch (err) {}

  return result
}