build-push-action/src/buildkit_validation.ts
Claude afff3d41cb fix: prevent buildkit corruption on sticky disk commit
- Add graceful shutdown validation - fail if buildkitd doesn't shutdown cleanly
- Add sync after buildkitd termination to flush database writes
- Add buildkit state validation before committing sticky disk
- Prevent sticky disk commit on build failures
- Add multiple sync operations before unmounting
- Add buildkit validation utilities to check database integrity

This should prevent the BoltDB corruption issues we've been seeing.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-07-31 17:59:12 -04:00

96 lines
3.3 KiB
TypeScript

import * as core from '@actions/core';
import {exec} from 'child_process';
import {promisify} from 'util';
const execAsync = promisify(exec);
const BUILDKIT_DAEMON_ADDR = 'tcp://127.0.0.1:1234';
/**
* Validates buildkit state before committing sticky disk.
* Uses buildctl to check if buildkit can respond to queries.
*
* @returns true if buildkit state appears healthy, false otherwise
*/
export async function validateBuildkitState(): Promise<boolean> {
try {
// First check if buildkitd process is running (it shouldn't be)
try {
await execAsync('pgrep buildkitd');
core.warning('buildkitd process is still running - state may be inconsistent');
return false;
} catch (error) {
if (error.code !== 1) {
core.warning(`Error checking for buildkitd process: ${error.message}`);
return false;
}
// Process not found - this is good
}
// Check for lock files that might indicate unclean shutdown
try {
const {stdout: lockFiles} = await execAsync('sudo find /var/lib/buildkit -name "*.lock" -o -name "*-wal" -o -name "*-shm" 2>/dev/null || true');
if (lockFiles.trim()) {
core.warning(`Found lock/temporary files indicating potential unclean shutdown: ${lockFiles.trim()}`);
return false;
}
} catch (error) {
core.debug(`Error checking for lock files: ${error.message}`);
}
// Check database file sizes - 0 byte files indicate corruption
const dbFiles = ['history.db', 'cache.db', 'snapshots.db', 'metadata_v2.db', 'containerdmeta.db'];
for (const db of dbFiles) {
try {
const {stdout} = await execAsync(`sudo stat -c%s /var/lib/buildkit/${db} 2>/dev/null || echo "0"`);
const size = parseInt(stdout.trim());
if (size === 0) {
core.warning(`Database file ${db} is 0 bytes - indicates corruption`);
return false;
}
} catch (error) {
// File might not exist, which is okay
core.debug(`Could not check ${db}: ${error.message}`);
}
}
// Final sync to ensure everything is flushed
await execAsync('sync');
return true;
} catch (error) {
core.error(`Error validating buildkit state: ${error.message}`);
return false;
}
}
/**
* Attempts to start buildkitd temporarily to check database integrity.
* This is a more thorough check but takes longer.
*
* @returns true if databases can be read successfully
*/
export async function validateBuildkitDatabases(): Promise<boolean> {
try {
// Start buildkitd in a way that just validates databases
const buildkitdProcess = await execAsync(
'timeout 5s sudo buildkitd --debug --addr tcp://127.0.0.1:12345 2>&1 || true'
);
// Check if it panicked with database errors
if (buildkitdProcess.stdout.includes('panic:') ||
buildkitdProcess.stdout.includes('assertion failed') ||
buildkitdProcess.stdout.includes('corrupted')) {
core.error('Buildkit database corruption detected during validation');
return false;
}
// Kill any remaining buildkitd process
await execAsync('sudo pkill -9 buildkitd || true');
return true;
} catch (error) {
core.debug(`Database validation check failed: ${error.message}`);
// If the check fails, assume databases might be okay
return true;
}
}