diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e7ed4b05f..18dc61af5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -57,9 +57,12 @@ jobs: - '@percy/sdk-utils' - '@percy/webdriver-utils' - '@percy/monitoring' + - '@percy/git-utils' runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v5 + with: + fetch-depth: ${{ matrix.package == '@percy/git-utils' && 0 || 1 }} - uses: actions/setup-node@v3 with: node-version: ${{ matrix.node }} diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index eb7803054..0da249d10 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -56,9 +56,12 @@ jobs: - '@percy/sdk-utils' - '@percy/webdriver-utils' - '@percy/monitoring' + - '@percy/git-utils' runs-on: windows-latest steps: - uses: actions/checkout@v5 + with: + fetch-depth: ${{ matrix.package == '@percy/git-utils' && 0 || 1 }} - uses: actions/setup-node@v3 with: node-version: 14 diff --git a/packages/git-utils/README.md b/packages/git-utils/README.md new file mode 100644 index 000000000..053d7f3a8 --- /dev/null +++ b/packages/git-utils/README.md @@ -0,0 +1,231 @@ +# @percy/git-utils + +Utility helpers for interacting with git (used internally by Percy CLI packages). + +This package provides higher-level helpers around common git operations with smart error handling, retry logic, and diagnostic capabilities. + +## Installation + +```bash +npm install @percy/git-utils +# or +yarn add @percy/git-utils +``` + +## Usage + +You can use the package in two ways: + +### Individual Function Imports + +```js +import { isGitRepository, getCurrentCommit } from '@percy/git-utils'; + +const isRepo = await isGitRepository(); +const commit = await getCurrentCommit(); +``` + +### PercyGitUtils Object + +```js +import { PercyGitUtils } from '@percy/git-utils'; + +const isRepo = await PercyGitUtils.isGitRepository(); +const commit = await PercyGitUtils.getCurrentCommit(); +``` + +## API Reference + +### Repository Validation + +#### `isGitRepository()` + +Check if the current directory is a git repository. + +```js +import { isGitRepository } from '@percy/git-utils'; + +const isRepo = await isGitRepository(); +// Returns: true or false +``` + +#### `getRepositoryRoot()` + +Get the root directory of the git repository. + +```js +import { getRepositoryRoot } from '@percy/git-utils'; + +const root = await getRepositoryRoot(); +// Returns: '/path/to/repo' +// Throws: Error if not a git repository +``` + +### Commit & Branch Information + +#### `getCurrentCommit()` + +Get the SHA of the current HEAD commit. + +```js +import { getCurrentCommit } from '@percy/git-utils'; + +const sha = await getCurrentCommit(); +// Returns: 'abc123...' (40-character SHA) +``` + +#### `getCurrentBranch()` + +Get the name of the current branch. + +```js +import { getCurrentBranch } from '@percy/git-utils'; + +const branch = await getCurrentBranch(); +// Returns: 'main' or 'HEAD' (if detached) +``` + +#### `commitExists(commit)` + +Check if a commit exists in the repository. + +```js +import { commitExists } from '@percy/git-utils'; + +const exists = await commitExists('abc123'); +// Returns: true or false +``` + +### Repository State & Diagnostics + +#### `getGitState()` + +Get comprehensive diagnostic information about the repository state. + +```js +import { getGitState } from '@percy/git-utils'; + +const state = await getGitState(); +// Returns: { +// isValid: true, +// isShallow: false, +// isDetached: false, +// isFirstCommit: false, +// hasRemote: true, +// remoteName: 'origin', +// defaultBranch: 'main', +// issues: [] // Array of diagnostic messages +// } +``` + +**State Properties:** +- `isValid`: Whether the directory is a valid git repository +- `isShallow`: Whether the repository is a shallow clone +- `isDetached`: Whether HEAD is in detached state +- `isFirstCommit`: Whether the current commit is the first commit +- `hasRemote`: Whether a remote is configured +- `remoteName`: Name of the first remote (usually 'origin') +- `defaultBranch`: Detected default branch name +- `issues`: Array of diagnostic warning messages + +### Merge Base & Changed Files + +#### `getMergeBase(targetBranch?)` + +Get the merge-base commit between HEAD and a target branch with smart fallback logic. + +```js +import { getMergeBase } from '@percy/git-utils'; + +const result = await getMergeBase('main'); +// Returns: { +// success: true, +// commit: 'abc123...', +// branch: 'main', +// error: null +// } + +// Or on failure: +// { +// success: false, +// commit: null, +// branch: 'main', +// error: { code: 'SHALLOW_CLONE', message: '...' } +// } +``` + +**Error Codes:** +- `NOT_GIT_REPO`: Not a git repository +- `SHALLOW_CLONE`: Repository is shallow +- `NO_MERGE_BASE`: No common ancestor found +- `UNKNOWN_ERROR`: Other error + +The function automatically: +- Detects the default branch if `targetBranch` is not provided +- Tries remote refs before local branches +- Handles detached HEAD state +- Provides helpful error messages + +#### `getChangedFiles(baselineCommit)` + +Get all changed files between a baseline commit and HEAD. + +```js +import { getChangedFiles } from '@percy/git-utils'; + +const files = await getChangedFiles('origin/main'); +// Returns: ['src/file.js', 'package.json', ...] +``` + +**Features:** +- Handles file renames (includes both old and new paths) +- Handles file copies (includes both source and destination) +- Detects submodule changes +- Returns paths relative to repository root + +### File Operations + +#### `checkoutFile(commit, filePath, outputDir)` + +Checkout a file from a specific commit to an output directory. + +```js +import { checkoutFile } from '@percy/git-utils'; + +const outputPath = await checkoutFile( + 'abc123', + 'src/file.js', + '/tmp/checkout' +); +// Returns: '/tmp/checkout/file.js' +``` + +## Advanced Features + +### Retry Logic + +All git commands include automatic retry logic for concurrent operations: +- Detects `index.lock` and similar errors +- Exponential backoff (100ms, 200ms, 400ms) +- Configurable via `retries` and `retryDelay` options + +### Error Handling + +Functions provide detailed error messages with context: +- Diagnostic information about repository state +- Suggestions for fixing common issues +- Specific error codes for programmatic handling + +## Development + +This repository uses Lerna and package-local scripts. From repo root run: + +```bash +yarn build +yarn test +yarn lint packages/git-utils +``` + +## License + +MIT diff --git a/packages/git-utils/package.json b/packages/git-utils/package.json new file mode 100644 index 000000000..131456b13 --- /dev/null +++ b/packages/git-utils/package.json @@ -0,0 +1,34 @@ +{ + "name": "@percy/git-utils", + "version": "1.31.4", + "license": "MIT", + "repository": { + "type": "git", + "url": "https://github.com/percy/cli", + "directory": "packages/git-utils" + }, + "publishConfig": { + "access": "public", + "tag": "latest" + }, + "engines": { + "node": ">=14" + }, + "files": [ + "dist" + ], + "main": "./dist/index.js", + "type": "module", + "exports": { + ".": "./dist/index.js" + }, + "scripts": { + "build": "node ../../scripts/build", + "lint": "eslint --ignore-path ../../.gitignore .", + "test": "node ../../scripts/test", + "test:coverage": "yarn test --coverage" + }, + "dependencies": { + "cross-spawn": "^7.0.3" + } +} diff --git a/packages/git-utils/src/git-commands.js b/packages/git-utils/src/git-commands.js new file mode 100644 index 000000000..28a932ce3 --- /dev/null +++ b/packages/git-utils/src/git-commands.js @@ -0,0 +1,50 @@ +// Basic git queries +export const GIT_REV_PARSE_GIT_DIR = ['git', 'rev-parse', '--git-dir']; +export const GIT_REV_PARSE_SHOW_TOPLEVEL = ['git', 'rev-parse', '--show-toplevel']; +export const GIT_REV_PARSE_HEAD = ['git', 'rev-parse', 'HEAD']; +export const GIT_REV_PARSE_ABBREV_REF_HEAD = ['git', 'rev-parse', '--abbrev-ref', 'HEAD']; +export const GIT_REV_PARSE_IS_SHALLOW = ['git', 'rev-parse', '--is-shallow-repository']; + +// Remote operations +export const GIT_REMOTE_V = ['git', 'remote', '-v']; +export const GIT_REMOTE_SET_HEAD = (remote, ...args) => ['git', 'remote', 'set-head', remote, ...args]; + +// History and commits +export const GIT_REV_LIST_PARENTS_HEAD = ['git', 'rev-list', '--parents', 'HEAD']; + +// Branch operations +export const GIT_REV_PARSE_VERIFY = (ref) => ['git', 'rev-parse', '--verify', ref]; +export const GIT_SYMBOLIC_REF = (ref) => ['git', 'symbolic-ref', ref]; + +// Config operations +export const GIT_CONFIG = (...args) => ['git', 'config', ...args]; +export const GIT_CONFIG_FILE_GET_REGEXP = (file, pattern) => + ['git', 'config', '--file', file, '--get-regexp', pattern]; + +// Merge base +export const GIT_MERGE_BASE = (ref1, ref2) => ['git', 'merge-base', ref1, ref2]; +export const GIT_FETCH = (remote, refspec, ...args) => ['git', 'fetch', remote, refspec, ...args]; + +// Diff operations +export const GIT_DIFF_NAME_STATUS = (baselineCommit, headCommit = 'HEAD') => + ['git', 'diff', '--name-status', `${baselineCommit}..${headCommit}`]; +export const GIT_DIFF_SUBMODULE = (baselineCommit, headCommit = 'HEAD') => + ['git', 'diff', `${baselineCommit}..${headCommit}`, '--submodule=short']; +export const GIT_DIFF_NAME_ONLY_SUBMODULE = (baselineCommit, headCommit = 'HEAD') => + ['git', 'diff', '--name-only', `${baselineCommit}..${headCommit}`]; + +// Submodule operations +export const GIT_SUBMODULE_DIFF = (submodulePath, baselineCommit, headCommit = 'HEAD') => + ['git', '-C', submodulePath, 'diff', '--name-only', `${baselineCommit}..${headCommit}`]; + +// File operations +export const GIT_SHOW = (ref, filePath) => ['git', 'show', `${ref}:${filePath}`]; +export const GIT_CAT_FILE_E = (ref) => ['git', 'cat-file', '-e', ref]; + +// Error patterns for retry logic +export const CONCURRENT_ERROR_PATTERNS = [ + 'index.lock', + 'unable to create', + 'file exists', + 'another git process' +]; diff --git a/packages/git-utils/src/git.js b/packages/git-utils/src/git.js new file mode 100644 index 000000000..a93b1aaf3 --- /dev/null +++ b/packages/git-utils/src/git.js @@ -0,0 +1,585 @@ +import { spawn } from 'cross-spawn'; +import path from 'path'; +import fs from 'fs'; +import * as GitCommands from './git-commands.js'; + +const fsPromises = fs.promises; + +/** + * Execute a git command with retry logic for concurrent operations + * @param {string} command - Git command to execute + * @param {Object} options - Options + * @param {number} options.retries - Number of retries (default: 3) + * @param {number} options.retryDelay - Delay between retries in ms (default: 100) + * @returns {Promise} - Command output + */ +async function execGit(command, options = {}) { + const { retries = 3, retryDelay = 100, ...spawnOptions } = options; + let lastError; + + for (let attempt = 0; attempt <= retries; attempt++) { + try { + return await execGitOnce(command, spawnOptions); + } catch (err) { + lastError = err; + + // Check if error is due to concurrent git operations (index lock, file conflicts, etc.) + const errorMsg = err.message.toLowerCase(); + const isConcurrentError = GitCommands.CONCURRENT_ERROR_PATTERNS.some( + pattern => errorMsg.includes(pattern) + ); + + // Only retry for concurrent operation errors with exponential backoff + if (isConcurrentError && attempt < retries) { + const delay = retryDelay * Math.pow(2, attempt); + await new Promise(resolve => setTimeout(resolve, delay)); + continue; + } + + throw err; + } + } + + throw lastError; +} + +/** + * Execute a git command once (no retries) + * @param {string} command - Git command to execute + * @param {Object} options - Spawn options + * @param {string|null} options.encoding - Output encoding ('utf8' or null for Buffer, default: 'utf8') + * @returns {Promise} - Command output (string if utf8, Buffer if null encoding) + */ +async function execGitOnce(command, options = {}) { + return new Promise((resolve, reject) => { + let cmd; + let args; + + if (Array.isArray(command)) { + [cmd, ...args] = command; + } else { + [cmd, ...args] = command.split(' '); + } + + // Extract encoding option, default to 'utf8' for backward compatibility + const { encoding = 'utf8', ...spawnOptions } = options; + const isBinaryMode = encoding === null || encoding === 'buffer'; + + const child = spawn(cmd, args, { + ...spawnOptions, + encoding: isBinaryMode ? null : encoding + }); + + let stdout = isBinaryMode ? [] : ''; + let stderr = ''; + + if (child.stdout) { + child.stdout.on('data', (data) => { + if (isBinaryMode) { + stdout.push(data); + } else { + stdout += data.toString(); + } + }); + } + + if (child.stderr) { + child.stderr.on('data', (data) => { + stderr += data.toString(); + }); + } + + child.on('error', (err) => { + reject(new Error(`Failed to execute git command: ${err.message}`)); + }); + + child.on('close', (code) => { + if (code !== 0) { + reject(new Error(`Git command failed (exit ${code}): ${stderr || stdout}`)); + } else { + if (isBinaryMode) { + resolve(Buffer.concat(stdout)); + } else { + resolve(stdout.trim()); + } + } + }); + }); +} + +// Check if the current directory is a git repository +// Executes: git rev-parse --git-dir +export async function isGitRepository() { + try { + await execGit(GitCommands.GIT_REV_PARSE_GIT_DIR); + return true; + } catch (err) { + return false; + } +} + +/** + * Get the root directory of the git repository + * Executes: git rev-parse --show-toplevel + */ +export async function getRepositoryRoot() { + try { + const root = await execGit(GitCommands.GIT_REV_PARSE_SHOW_TOPLEVEL); + return root; + } catch (err) { + throw new Error('Not a git repository'); + } +} + +/** + * Get the current commit SHA + * Executes: git rev-parse HEAD + */ +export async function getCurrentCommit() { + try { + const commit = await execGit(GitCommands.GIT_REV_PARSE_HEAD); + return commit; + } catch (err) { + throw new Error(`Failed to get current commit: ${err.message}`); + } +} + +/** + * Get current git branch name + * Executes: git rev-parse --abbrev-ref HEAD + * @returns {Promise} - Current branch name + */ +export async function getCurrentBranch() { + try { + const branch = await execGit(GitCommands.GIT_REV_PARSE_ABBREV_REF_HEAD); + return branch; + } catch (err) { + throw new Error(`Failed to get current branch: ${err.message}`); + } +} + +/** + * Validate git repository state and return diagnostic info + * Checks: repository validity, shallow clone, detached HEAD, remote config, default branch + * @returns {Promise} - { isValid, isShallow, isDetached, defaultBranch, issues } + */ +export async function getGitState() { + const state = { + isValid: false, + isShallow: false, + isDetached: false, + isFirstCommit: false, + hasRemote: false, + remoteName: null, + defaultBranch: null, + issues: [] + }; + + // Verify this is a valid git repository + // Executes: git rev-parse --git-dir + try { + await execGit(GitCommands.GIT_REV_PARSE_GIT_DIR); + state.isValid = true; + } catch { + state.issues.push('Not a git repository'); + return state; + } + + // Check for remote configuration + // Executes: git remote -v + try { + const remotes = await execGit(GitCommands.GIT_REMOTE_V); + if (remotes && remotes.trim().length > 0) { + state.hasRemote = true; + const match = remotes.match(/^(\S+)\s+/); + if (match) { + state.remoteName = match[1]; + } + } else { + state.hasRemote = false; + state.issues.push("No git remote configured - run 'git remote add origin '"); + } + } catch { + state.hasRemote = false; + state.issues.push('Failed to check git remote configuration'); + } + + // Check if repository is a shallow clone + // Executes: git rev-parse --is-shallow-repository + try { + const result = await execGit(GitCommands.GIT_REV_PARSE_IS_SHALLOW); + state.isShallow = result === 'true'; + } catch { + // Fallback: check for .git/shallow file existence + try { + const repoRoot = await getRepositoryRoot(); + const shallowPath = path.join(repoRoot, '.git', 'shallow'); + await fsPromises.access(shallowPath, fs.constants.F_OK); + state.isShallow = true; + } catch { + state.isShallow = false; + } + } + + // Warn about shallow clone as it affects history operations + if (state.isShallow) { + state.issues.push("Shallow clone detected - use 'git fetch --unshallow' or set fetch-depth: 0 in CI"); + } + + // Check if HEAD is detached (not on a branch) + try { + const branch = await getCurrentBranch(); + state.isDetached = branch === 'HEAD'; + if (state.isDetached) { + state.issues.push('Detached HEAD state - may need to fetch remote branches'); + } + } catch { + state.isDetached = false; + } + + // Check if this is the first commit (no parent commits) + // Executes: git rev-parse HEAD~1 (simplified approach) + try { + await execGit(GitCommands.GIT_REV_PARSE_VERIFY('HEAD~1')); + state.isFirstCommit = false; + } catch { + // If HEAD~1 doesn't exist, this is the first commit + state.isFirstCommit = true; + } + + // Determine default branch by checking common branch names + state.defaultBranch = await findDefaultBranch(state.hasRemote, state.remoteName); + + return state; +} + +/** + * Helper function to find the default branch + * Uses git symbolic-ref to detect the actual default branch instead of guessing + * @param {boolean} hasRemote - Whether repository has a remote configured + * @param {string|null} remoteName - Name of the remote (e.g., 'origin') + * @returns {Promise} - Default branch name + */ +async function findDefaultBranch(hasRemote, remoteName) { + if (hasRemote) { + const remote = remoteName || 'origin'; + // Executes: git symbolic-ref refs/remotes//HEAD + // This returns the branch that the remote considers as default (e.g., refs/remotes/origin/main) + try { + const output = await execGit(GitCommands.GIT_SYMBOLIC_REF(`refs/remotes/${remote}/HEAD`)); + const match = output.match(/refs\/remotes\/[^/]+\/(.+)/); + if (match) { + return match[1]; + } + } catch { + // If symbolic-ref fails, the remote HEAD might not be set + // This can happen in shallow clones or if remote HEAD was never fetched + } + + // Fallback: Try to set the remote HEAD by fetching it, then retry + try { + // Executes: git remote set-head --auto + // This queries the remote and sets the symbolic-ref locally + await execGit(GitCommands.GIT_REMOTE_SET_HEAD(remote, '--auto')); + + // Retry getting the symbolic ref + const output = await execGit(GitCommands.GIT_SYMBOLIC_REF(`refs/remotes/${remote}/HEAD`)); + const match = output.match(/refs\/remotes\/[^/]+\/(.+)/); + if (match) { + return match[1]; + } + } catch { + // Remote set-head failed, continue to manual detection + } + + // Last resort for remote: Check common branch names + const commonBranches = ['main', 'master', 'develop', 'development']; + for (const branch of commonBranches) { + try { + await execGit(GitCommands.GIT_REV_PARSE_VERIFY(`${remote}/${branch}`)); + return branch; + } catch { + // Try next branch + } + } + } else { + // No remote configured - detect local default branch + // For local repos, we check which branch was used during git init + + try { + // Executes: git config init.defaultBranch + const configBranch = await execGit(GitCommands.GIT_CONFIG('init.defaultBranch')); + if (configBranch) { + // Verify this branch actually exists locally + try { + await execGit(GitCommands.GIT_REV_PARSE_VERIFY(configBranch)); + return configBranch; + } catch { + // Config branch doesn't exist, continue + } + } + } catch { + // init.defaultBranch not set, continue + } + + // Fallback: Check common local branch names + const commonBranches = ['main', 'master', 'develop', 'development']; + for (const branch of commonBranches) { + try { + await execGit(GitCommands.GIT_REV_PARSE_VERIFY(branch)); + return branch; + } catch { + // Try next branch + } + } + } + return 'main'; +} + +/** + * Get merge-base commit with smart error handling and recovery + * Finds the common ancestor between HEAD and a target branch + * Executes: git merge-base HEAD + * @param {string} targetBranch - Target branch (if null, auto-detects) + * @returns {Promise} - { success, commit, branch, error } + */ +export async function getMergeBase(targetBranch = null) { + const result = { success: false, commit: null, branch: null, error: null }; + + try { + const gitState = await getGitState(); + + if (!gitState.isValid) { + result.error = { code: 'NOT_GIT_REPO', message: 'Not a git repository' }; + return result; + } + + if (gitState.isShallow) { + result.error = { + code: 'SHALLOW_CLONE', + message: "Repository is a shallow clone. Use 'git fetch --unshallow' or configure CI with fetch-depth: 0" + }; + return result; + } + + const branch = targetBranch || gitState.defaultBranch; + result.branch = branch; + + // If in detached HEAD state with remote, try to fetch the branch + if (gitState.isDetached && gitState.hasRemote) { + const remoteName = gitState.remoteName || 'origin'; + try { + // Check if remote branch exists + await execGit(GitCommands.GIT_REV_PARSE_VERIFY(`${remoteName}/${branch}`)); + } catch { + try { + // Fetch remote branch with limited depth + // Executes: git fetch :refs/remotes// --depth=100 + await execGit(GitCommands.GIT_FETCH(remoteName, `${branch}:refs/remotes/${remoteName}/${branch}`, '--depth=100')); + } catch { + // Fetch failed, continue with available refs + } + } + } + + // Build list of branch references to try for merge-base + const attempts = []; + + if (gitState.hasRemote) { + const remoteName = gitState.remoteName || 'origin'; + attempts.push(`${remoteName}/${branch}`); + } + + attempts.push(branch); + + // Also try default branch if different from target + if (branch !== gitState.defaultBranch) { + if (gitState.hasRemote) { + const remoteName = gitState.remoteName || 'origin'; + attempts.push(`${remoteName}/${gitState.defaultBranch}`); + } + attempts.push(gitState.defaultBranch); + } + + // Try each reference until one succeeds + // Executes: git merge-base HEAD + for (const attempt of attempts) { + try { + const commit = await execGit(GitCommands.GIT_MERGE_BASE('HEAD', attempt)); + result.success = true; + result.commit = commit; + return result; + } catch (err) { + // Continue to next attempt + } + } + + // No merge-base found - build helpful error message + let errorMessage = `Could not find common ancestor with ${branch}.`; + + if (!gitState.hasRemote) { + errorMessage += ` No git remote configured. Tried local branch '${branch}'.`; + } else { + errorMessage += ' This might be an orphan branch.'; + errorMessage += ` Tried: ${attempts.join(', ')}.`; + } + + result.error = { + code: 'NO_MERGE_BASE', + message: errorMessage + }; + } catch (err) { + result.error = { + code: 'UNKNOWN_ERROR', + message: `Failed to get merge base: ${err.message}` + }; + } + + return result; +} + +/** + * Get changed files between current commit and baseline + * Handles renames, copies, and submodule changes + * Executes: git diff --name-status ..HEAD + * @param {string} baselineCommit - Baseline commit SHA or ref + * @returns {Promise} - Array of changed file paths (relative to repo root) + */ +export async function getChangedFiles(baselineCommit = 'origin/main') { + try { + // Get list of changed files with status indicators + const output = await execGit(GitCommands.GIT_DIFF_NAME_STATUS(baselineCommit)); + + if (!output) { + return []; + } + + const files = new Set(); + const lines = output.split('\n').filter(Boolean); + + // Parse each line of git diff output + for (const line of lines) { + const parts = line.split('\t'); + const status = parts[0]; + + // Handle renames: R\told\tnew + if (status.startsWith('R')) { + const oldPath = parts[1]; + const newPath = parts[2]; + + if (oldPath) files.add(oldPath); + if (newPath) files.add(newPath); + } else if (status.startsWith('C')) { + // Handle copies: C\tsource\tdest + const sourcePath = parts[1]; + const destPath = parts[2]; + + if (sourcePath) files.add(sourcePath); + if (destPath) files.add(destPath); + } else { + const filePath = parts[1]; + if (filePath) files.add(filePath); + } + } + + // Check for git submodule changes + // Executes: git diff ..HEAD --submodule=short + try { + const submoduleOutput = await execGit(GitCommands.GIT_DIFF_SUBMODULE(baselineCommit)); + if (submoduleOutput && submoduleOutput.includes('Submodule')) { + files.add('.gitmodules'); + + try { + // Get list of submodule paths from .gitmodules + // Executes: git config --file .gitmodules --get-regexp path + const submodulePaths = await execGit(GitCommands.GIT_CONFIG_FILE_GET_REGEXP('.gitmodules', 'path')); + const submodules = submodulePaths.split('\n') + .filter(Boolean) + .map(line => line.split(' ')[1]); + + for (const submodulePath of submodules) { + try { + // Validate submodule path to prevent path traversal attacks + const normalizedSub = path.normalize(submodulePath); + if (path.isAbsolute(normalizedSub) || normalizedSub.split(path.sep).includes('..')) { + // Skip suspicious submodule paths + continue; + } + + // Get changed files within the submodule + // Executes: git -C diff --name-only ..HEAD + const subOutput = await execGit( + GitCommands.GIT_SUBMODULE_DIFF(normalizedSub, baselineCommit), + { retries: 1 } + ); + if (subOutput) { + const subFiles = subOutput.split('\n').filter(Boolean); + for (const file of subFiles) { + files.add(`${submodulePath}/${file}`); + } + } + } catch { + // Submodule might not exist or be initialized + } + } + } catch { + // Failed to enumerate submodules, but .gitmodules added + } + } + } catch { + // Continue without submodule tracking + } + + return Array.from(files); + } catch (err) { + throw new Error(`Failed to get changed files: ${err.message}`); + } +} + +/** + * Get file content from a specific commit + * Supports both text and binary files + * Executes: git show : + * @param {string} commit - Commit SHA or ref (HEAD, branch name, etc.) + * @param {string} filePath - File path relative to repo root + * @param {Object} options - Options + * @param {string|null} options.encoding - Output encoding ('utf8' or null for Buffer, default: 'utf8') + * @returns {Promise} - File contents (string if utf8, Buffer if null encoding) + */ +export async function getFileContentFromCommit(commit, filePath, options = {}) { + try { + if (!commit || typeof commit !== 'string') { + throw new Error('Invalid commit parameter'); + } + // Sanitize file path to prevent path traversal attacks + const normalized = path.normalize(filePath); + if (path.isAbsolute(normalized) || normalized.split(path.sep).includes('..')) { + throw new Error(`Invalid file path: ${filePath}`); + } + const { encoding = 'utf8' } = options; + const contents = await execGit(GitCommands.GIT_SHOW(commit, normalized), { encoding }); + return contents; + } catch (err) { + throw new Error(`Failed to get file ${filePath} from commit ${commit}: ${err.message}`); + } +} + +/** + * Check if a commit exists in the repository + * Executes: git cat-file -e + * @param {string} commit - Commit SHA or ref to check + * @returns {Promise} - True if commit exists + */ +export async function commitExists(commit) { + try { + if (!commit || typeof commit !== 'string') return false; + // Validate commit reference format for security + const safeRef = commit === 'HEAD' || /^[0-9a-fA-F]{4,40}$/.test(commit) || /^(refs\/[A-Za-z0-9._/-]+)$/.test(commit); + if (!safeRef) return false; + + await execGit(GitCommands.GIT_CAT_FILE_E(commit)); + return true; + } catch (err) { + return false; + } +} diff --git a/packages/git-utils/src/index.js b/packages/git-utils/src/index.js new file mode 100644 index 000000000..0000f3aff --- /dev/null +++ b/packages/git-utils/src/index.js @@ -0,0 +1,7 @@ +import * as gitUtils from './git.js'; + +export * from './git.js'; + +export const PercyGitUtils = gitUtils; + +export default gitUtils; diff --git a/packages/git-utils/test-binary.bin b/packages/git-utils/test-binary.bin new file mode 100644 index 000000000..0dd1608e4 --- /dev/null +++ b/packages/git-utils/test-binary.bin @@ -0,0 +1,2 @@ +‰PNG + diff --git a/packages/git-utils/test/.eslintrc b/packages/git-utils/test/.eslintrc new file mode 100644 index 000000000..e9b386cb0 --- /dev/null +++ b/packages/git-utils/test/.eslintrc @@ -0,0 +1,4 @@ +env: + jasmine: true +rules: + import/no-extraneous-dependencies: off diff --git a/packages/git-utils/test/git.test.js b/packages/git-utils/test/git.test.js new file mode 100644 index 000000000..4a8181f61 --- /dev/null +++ b/packages/git-utils/test/git.test.js @@ -0,0 +1,505 @@ +import { + isGitRepository, + getRepositoryRoot, + getCurrentCommit, + getCurrentBranch, + getGitState, + getMergeBase, + getChangedFiles, + getFileContentFromCommit, + commitExists +} from '../src/git.js'; +import fs from 'fs'; +import path from 'path'; +import os from 'os'; + +describe('@percy/git-utils', () => { + describe('isGitRepository', () => { + it('should return true when in a git repository', async () => { + const result = await isGitRepository(); + expect(result).toBe(true); + }); + + it('should return false when not in a git repository', async () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'git-test-')); + const originalCwd = process.cwd(); + + try { + process.chdir(tmpDir); + const result = await isGitRepository(); + expect(result).toBe(false); + } finally { + process.chdir(originalCwd); + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + }); + + describe('getRepositoryRoot', () => { + it('should return the repository root path', async () => { + const root = await getRepositoryRoot(); + expect(typeof root).toBe('string'); + expect(root.length).toBeGreaterThan(0); + expect(root).toContain('cli'); + }); + + it('should throw error when not in a git repository', async () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'git-test-')); + const originalCwd = process.cwd(); + + try { + process.chdir(tmpDir); + await expectAsync(getRepositoryRoot()).toBeRejectedWithError(/Not a git repository/); + } finally { + process.chdir(originalCwd); + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + }); + + describe('getCurrentCommit', () => { + it('should return current commit SHA', async () => { + const commit = await getCurrentCommit(); + expect(typeof commit).toBe('string'); + expect(commit).toMatch(/^[0-9a-f]{40}$/); + }); + + it('should return valid commit that exists', async () => { + const commit = await getCurrentCommit(); + const exists = await commitExists(commit); + expect(exists).toBe(true); + }); + }); + + describe('getCurrentBranch', () => { + it('should return current branch name', async () => { + const branch = await getCurrentBranch(); + expect(typeof branch).toBe('string'); + expect(branch.length).toBeGreaterThan(0); + }); + + it('should return a valid branch name', async () => { + const branch = await getCurrentBranch(); + // Branch should not be empty and should be valid git ref format + expect(branch).not.toBe(''); + expect(branch).toMatch(/^[a-zA-Z0-9/_-]+$/); + }); + }); + + describe('commitExists', () => { + it('should return true for existing commit (HEAD)', async () => { + const currentCommit = await getCurrentCommit(); + const exists = await commitExists(currentCommit); + expect(exists).toBe(true); + }); + + it('should return true for HEAD reference', async () => { + const exists = await commitExists('HEAD'); + expect(exists).toBe(true); + }); + + it('should return false for non-existing commit', async () => { + // Use a SHA with an unusual pattern that won't match any object + const exists = await commitExists('deadbeefdeadbeefdeadbeefdeadbeefdeadbeef'); + expect(exists).toBe(false); + }); + + it('should return false for invalid commit format', async () => { + const exists = await commitExists('invalid-commit-sha'); + expect(exists).toBe(false); + }); + }); + + describe('getGitState', () => { + it('should return comprehensive git state object', async () => { + const state = await getGitState(); + + // Verify structure + expect(state).toEqual(jasmine.objectContaining({ + isValid: jasmine.any(Boolean), + isShallow: jasmine.any(Boolean), + isDetached: jasmine.any(Boolean), + isFirstCommit: jasmine.any(Boolean), + hasRemote: jasmine.any(Boolean), + defaultBranch: jasmine.any(String), + issues: jasmine.any(Array) + })); + }); + + it('should detect valid git repository', async () => { + const state = await getGitState(); + expect(state.isValid).toBe(true); + }); + + it('should have a default branch set', async () => { + const state = await getGitState(); + expect(state.defaultBranch).toBeTruthy(); + expect(['main', 'master', 'develop', 'development']).toContain(state.defaultBranch); + }); + + it('should detect remote configuration correctly', async () => { + const state = await getGitState(); + expect(state.hasRemote).toBe(true); + if (state.hasRemote) { + expect(state.remoteName).toBeTruthy(); + expect(typeof state.remoteName).toBe('string'); + } + }); + + it('should not be shallow repository in normal clone', async () => { + const state = await getGitState(); + // In a normal development environment, this should not be shallow + // CI environments might be shallow, so we just verify the type + expect(typeof state.isShallow).toBe('boolean'); + }); + + it('should detect non-git repository', async () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'git-test-')); + const originalCwd = process.cwd(); + + try { + process.chdir(tmpDir); + const state = await getGitState(); + expect(state.isValid).toBe(false); + expect(state.issues).toContain('Not a git repository'); + } finally { + process.chdir(originalCwd); + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it('should include issues array with helpful messages', async () => { + const state = await getGitState(); + expect(Array.isArray(state.issues)).toBe(true); + // In a valid repo with remote, issues should be empty or informational + if (!state.isValid || !state.hasRemote || state.isShallow || state.isDetached) { + expect(state.issues.length).toBeGreaterThan(0); + } + }); + }); + + describe('getMergeBase', () => { + it('should return result object with correct structure', async () => { + const result = await getMergeBase(); + + expect(result).toEqual(jasmine.objectContaining({ + success: jasmine.any(Boolean), + commit: jasmine.any(String), + branch: jasmine.any(String), + error: null + })); + }); + + it('should successfully get merge-base with default branch', async () => { + const result = await getMergeBase(); + + expect(result.success).toBe(true); + expect(result.commit).toMatch(/^[0-9a-f]{40}$/); + expect(result.error).toBe(null); + }); + + it('should return valid commit SHA that exists', async () => { + const result = await getMergeBase(); + + if (result.success) { + const exists = await commitExists(result.commit); + expect(exists).toBe(true); + } + }); + + it('should accept specific target branch', async () => { + const currentBranch = await getCurrentBranch(); + const result = await getMergeBase(currentBranch); + + // Should succeed or provide error + expect(result).toBeDefined(); + expect(typeof result.success).toBe('boolean'); + expect(result.branch).toBe(currentBranch); + }); + + it('should handle non-existent branch gracefully with fallback', async () => { + const result = await getMergeBase('this-branch-definitely-does-not-exist-xyz-12345-nonexistent'); + + expect(typeof result.success).toBe('boolean'); + expect(result.branch).toBe('this-branch-definitely-does-not-exist-xyz-12345-nonexistent'); + + if (!result.success) { + expect(result.error).toBeTruthy(); + expect(result.error.code).toBe('NO_MERGE_BASE'); + expect(result.error.message).toContain('Could not find common ancestor'); + } else { + expect(result.commit).toMatch(/^[0-9a-f]{40}$/); + } + }); + + it('should provide helpful error messages', async () => { + const result = await getMergeBase('nonexistent-branch'); + + if (!result.success) { + expect(result.error).toBeTruthy(); + expect(result.error.code).toBeTruthy(); + expect(result.error.message).toBeTruthy(); + expect(typeof result.error.message).toBe('string'); + } + }); + }); + + describe('getChangedFiles', () => { + it('should return an array', async () => { + const files = await getChangedFiles('HEAD'); + expect(Array.isArray(files)).toBe(true); + }); + + it('should return empty array when comparing HEAD to itself', async () => { + const files = await getChangedFiles('HEAD'); + expect(files).toEqual([]); + }); + + it('should detect changes between commits', async () => { + try { + const files = await getChangedFiles('HEAD~1'); + expect(Array.isArray(files)).toBe(true); + expect(files.length).toBeGreaterThanOrEqual(0); + } catch (err) { + expect(err.message).toContain('Failed to get changed files'); + } + }); + + it('should return file paths as strings', async () => { + try { + const files = await getChangedFiles('HEAD~10'); + files.forEach(file => { + expect(typeof file).toBe('string'); + expect(file.length).toBeGreaterThan(0); + }); + } catch (err) { + expect(err.message).toContain('Failed to get changed files'); + } + }); + + it('should handle baseline commit reference', async () => { + const state = await getGitState(); + const remote = state.remoteName || 'origin'; + const branch = state.defaultBranch || 'main'; + + try { + const files = await getChangedFiles(`${remote}/${branch}`); + expect(Array.isArray(files)).toBe(true); + } catch (err) { + expect(err.message).toBeTruthy(); + } + }); + }); + + describe('getFileContentFromCommit', () => { + it('should get text file content from current commit', async () => { + const currentCommit = await getCurrentCommit(); + const content = await getFileContentFromCommit(currentCommit, 'README.md'); + + expect(typeof content).toBe('string'); + expect(content.length).toBeGreaterThan(0); + }); + + it('should get file content from HEAD reference', async () => { + const content = await getFileContentFromCommit('HEAD', 'README.md'); + + expect(typeof content).toBe('string'); + expect(content.length).toBeGreaterThan(0); + }); + + it('should get file content with relative path', async () => { + const currentCommit = await getCurrentCommit(); + const content = await getFileContentFromCommit(currentCommit, 'packages/git-utils/package.json'); + + expect(typeof content).toBe('string'); + expect(content).toContain('"name"'); + }); + + it('should get binary file content without corruption', async () => { + const currentCommit = await getCurrentCommit(); + + // Get binary content with encoding: null + const binaryContent = await getFileContentFromCommit( + currentCommit, + 'packages/git-utils/test-binary.bin', + { encoding: null } + ); + + expect(Buffer.isBuffer(binaryContent)).toBe(true); + + // Verify PNG header bytes + expect(binaryContent[0]).toBe(0x89); + expect(binaryContent[1]).toBe(0x50); + expect(binaryContent[2]).toBe(0x4e); + expect(binaryContent[3]).toBe(0x47); + + // Compare with actual file + const repoRoot = await getRepositoryRoot(); + const actualContent = fs.readFileSync(path.join(repoRoot, 'packages/git-utils/test-binary.bin')); + expect(Buffer.compare(binaryContent, actualContent)).toBe(0); + }); + + it('should throw error for non-existent file', async () => { + const currentCommit = await getCurrentCommit(); + + await expectAsync( + getFileContentFromCommit(currentCommit, 'this-file-does-not-exist.txt') + ).toBeRejectedWithError(/Failed to get file/); + }); + + it('should throw error for invalid commit', async () => { + await expectAsync( + getFileContentFromCommit('invalid-commit-sha-12345', 'README.md') + ).toBeRejectedWithError(/Failed to get file/); + }); + + it('should reject absolute paths', async () => { + const currentCommit = await getCurrentCommit(); + + await expectAsync( + getFileContentFromCommit(currentCommit, '/etc/passwd') + ).toBeRejectedWithError(/Invalid file path/); + }); + + it('should reject path traversal attempts', async () => { + const currentCommit = await getCurrentCommit(); + + await expectAsync( + getFileContentFromCommit(currentCommit, '../../../etc/passwd') + ).toBeRejectedWithError(/Invalid file path/); + + await expectAsync( + getFileContentFromCommit(currentCommit, 'foo/../../secrets.txt') + ).toBeRejectedWithError(/Invalid file path/); + }); + + it('should throw error for invalid commit parameter', async () => { + await expectAsync( + getFileContentFromCommit('', 'README.md') + ).toBeRejectedWithError(/Invalid commit parameter/); + + await expectAsync( + getFileContentFromCommit(null, 'README.md') + ).toBeRejectedWithError(/Invalid commit parameter/); + }); + + it('should return string by default for text files', async () => { + const currentCommit = await getCurrentCommit(); + + // Default behavior without encoding option + const content = await getFileContentFromCommit(currentCommit, 'package.json'); + + expect(typeof content).toBe('string'); + expect(content).toContain('"private"'); + expect(content).toContain('"workspaces"'); + }); + + it('should return Buffer when encoding is null', async () => { + const currentCommit = await getCurrentCommit(); + + const content = await getFileContentFromCommit( + currentCommit, + 'package.json', + { encoding: null } + ); + + expect(Buffer.isBuffer(content)).toBe(true); + + // Can convert back to string + const str = content.toString('utf8'); + expect(str).toContain('"private"'); + expect(str).toContain('"workspaces"'); + }); + }); + + describe('Edge cases and error handling', () => { + it('should handle concurrent operations with retry', async () => { + // Run multiple operations concurrently + const promises = [ + isGitRepository(), + getCurrentCommit(), + getCurrentBranch(), + getGitState() + ]; + + const results = await Promise.all(promises); + + expect(results[0]).toBe(true); + expect(typeof results[1]).toBe('string'); + expect(typeof results[2]).toBe('string'); + expect(results[3].isValid).toBe(true); + }); + + it('should handle operations in non-git directory gracefully', async () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'git-test-')); + const originalCwd = process.cwd(); + + try { + process.chdir(tmpDir); + + const isRepo = await isGitRepository(); + expect(isRepo).toBe(false); + + await expectAsync(getRepositoryRoot()).toBeRejected(); + await expectAsync(getCurrentCommit()).toBeRejected(); + await expectAsync(getCurrentBranch()).toBeRejected(); + + const state = await getGitState(); + expect(state.isValid).toBe(false); + } finally { + process.chdir(originalCwd); + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it('should handle multiple sequential operations', async () => { + const isRepo = await isGitRepository(); + const root = await getRepositoryRoot(); + const commit = await getCurrentCommit(); + const branch = await getCurrentBranch(); + const state = await getGitState(); + + expect(isRepo).toBe(true); + expect(root).toBeTruthy(); + expect(commit).toMatch(/^[0-9a-f]{40}$/); + expect(branch).toBeTruthy(); + expect(state.isValid).toBe(true); + }); + }); + + describe('security checks', () => { + it('commitExists should reject clearly invalid refs', async () => { + const invalid = 'some$weird;ref`rm -rf /`'; + const exists = await commitExists(invalid); + expect(exists).toBe(false); + }); + }); + + describe('Integration scenarios', () => { + it('should provide complete workflow: check repo, get state, find merge-base', async () => { + const isRepo = await isGitRepository(); + expect(isRepo).toBe(true); + + const state = await getGitState(); + expect(state.isValid).toBe(true); + + const mergeBase = await getMergeBase(state.defaultBranch); + expect(mergeBase.success).toBe(true); + expect(mergeBase.commit).toBeTruthy(); + }); + + it('should support typical CI workflow', async () => { + const state = await getGitState(); + + if (state.isValid && !state.isShallow && state.hasRemote) { + const mergeBase = await getMergeBase(); + expect(mergeBase.success).toBe(true); + + if (mergeBase.success) { + const changedFiles = await getChangedFiles(mergeBase.commit); + expect(Array.isArray(changedFiles)).toBe(true); + } + } + }); + }); +});