xetup/internal/prereboot/prereboot_windows.go
X9 Dev d30767ef8b
Some checks failed
release / build-and-release (push) Failing after 32s
fix: comprehensive reliability and robustness improvements
Critical fixes:
- Fix resume mode: StepsByIDs returned Enabled=false, all resume steps
  would be SKIPPED (deployment could never resume after reboot)
- Add reboot loop protection: per-step retry counter (max 5) prevents
  infinite reboot cycles when a step always exits with code 9
- Block reboot when state.Save() fails in resumePhase (prevents state
  loss leading to full restart from scratch)
- Atomic state file write (write-to-tmp + rename) prevents JSON
  corruption on BSOD/power loss mid-write
- Script watchdog: kills scripts after 30 min of no output (resets on
  each line, so active long-running scripts are never killed)
- Fix copyFile: check Close() error explicitly instead of deferred
  close that silently drops flush errors (e.g. disk full)

High severity:
- Cleanup() now logs errors instead of silently ignoring them
- Email report: 3 retries with backoff + always saves C:\X9\report.html
- Winget parallel jobs: 10 min timeout, kill hung jobs
- UCPD stop verification: 2s wait + state check before PDF association
- Atera installer: /qn -> /qb so MFA window can appear
- GVLK activation: match by EditionID (registry, not localized) instead
  of fragile OS caption string matching

Medium severity:
- Default profile hive unload: retry loop (5 attempts, increasing delay)
- LayoutModification.xml: UTF-8 without BOM (PS 5.1 Set-Content adds BOM)
- Set-Reg SYSTEM task: try/finally ensures temp file + task cleanup
- Windows Update: @($available).Count for PS 5.1 single-result edge case
- config.json: add missing kmsServer field in activation section

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-28 11:49:43 +02:00

201 lines
6.3 KiB
Go

//go:build windows
// Package prereboot ensures the reboot-resume infrastructure is in place
// before xetup triggers a system restart mid-deployment.
package prereboot
import (
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"strings"
"syscall"
"golang.org/x/sys/windows/registry"
)
// StablePath is where xetup.exe is copied so it survives across reboots.
const StablePath = `C:\Windows\Setup\Scripts\xetup.exe`
// TaskName is the scheduled task that re-launches xetup after each reboot.
const TaskName = "X9-Resume"
const adminUser = "adminx9"
const winlogonKey = `SOFTWARE\Microsoft\Windows NT\CurrentVersion\Winlogon`
// IsAdminx9 reports whether the current process runs as the adminx9 account.
func IsAdminx9() bool {
return strings.EqualFold(os.Getenv("USERNAME"), adminUser)
}
// Prepare ensures the reboot-resume infrastructure is in place.
// When not running as adminx9 (first reboot trigger):
// - creates the adminx9 local account if it does not exist
// - copies the running binary to StablePath
// - enables autologon for adminx9 (AutoLogonCount=20 safety cap)
// - registers the X9-Resume scheduled task (AtLogOn adminx9, RunLevel Highest)
//
// When already running as adminx9 (subsequent rounds) all of the above is
// already in place; the function returns immediately without changes.
// Safe to call repeatedly (idempotent).
func Prepare() error {
if IsAdminx9() {
return nil
}
if err := ensureAdminx9User(); err != nil {
return fmt.Errorf("adminx9 account: %w", err)
}
if err := copySelfTo(StablePath); err != nil {
return fmt.Errorf("copy binary: %w", err)
}
if err := setAutologon(); err != nil {
return fmt.Errorf("autologon: %w", err)
}
if err := registerResumeTask(); err != nil {
return fmt.Errorf("resume task: %w", err)
}
return nil
}
// Cleanup disables autologon and removes the X9-Resume scheduled task.
// Called when all deployment steps have completed successfully.
// Errors are logged to stderr (visible in Deploy.log via runner) but do not
// block completion — deployment is already done at this point.
func Cleanup() {
if err := disableAutologon(); err != nil {
fmt.Fprintf(os.Stderr, "[WARN] Cleanup: disableAutologon failed: %v\n", err)
}
if err := unregisterResumeTask(); err != nil {
fmt.Fprintf(os.Stderr, "[WARN] Cleanup: unregisterResumeTask failed: %v\n", err)
}
}
// ensureAdminx9User creates the adminx9 local account if absent.
func ensureAdminx9User() error {
// "net user adminx9" exits 0 when user exists
if err := newHiddenCmd("net", "user", adminUser).Run(); err == nil {
return nil // already exists
}
// Create with empty password
if b, err := newHiddenCmd("net", "user", adminUser, "", "/add").CombinedOutput(); err != nil {
return fmt.Errorf("net user /add: %s: %w", strings.TrimSpace(string(b)), err)
}
// Add to local Administrators group
if b, err := newHiddenCmd("net", "localgroup", "Administrators", adminUser, "/add").CombinedOutput(); err != nil {
return fmt.Errorf("net localgroup: %s: %w", strings.TrimSpace(string(b)), err)
}
// Hide from the Windows login screen
k, _, err := registry.CreateKey(registry.LOCAL_MACHINE,
`SOFTWARE\Microsoft\Windows NT\CurrentVersion\Winlogon\SpecialAccounts\UserList`,
registry.SET_VALUE)
if err != nil {
return fmt.Errorf("registry hide: %w", err)
}
defer k.Close()
return k.SetDWordValue(adminUser, 0)
}
// copySelfTo copies the running executable to dst.
// Skips the copy when src and dst resolve to the same path.
func copySelfTo(dst string) error {
src, err := os.Executable()
if err != nil {
return err
}
if resolved, err := filepath.EvalSymlinks(src); err == nil {
src = resolved
}
if strings.EqualFold(src, dst) {
return nil
}
if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil {
return err
}
return copyFile(src, dst)
}
func copyFile(src, dst string) error {
r, err := os.Open(src)
if err != nil {
return err
}
defer r.Close()
w, err := os.Create(dst)
if err != nil {
return err
}
if _, err = io.Copy(w, r); err != nil {
w.Close()
return err
}
// Explicit Close: on Windows the actual flush happens here.
// A deferred close would silently drop write errors (e.g. disk full).
return w.Close()
}
func setAutologon() error {
k, err := registry.OpenKey(registry.LOCAL_MACHINE, winlogonKey, registry.SET_VALUE)
if err != nil {
return err
}
defer k.Close()
for _, kv := range []struct{ name, val string }{
{"AutoAdminLogon", "1"},
{"DefaultUserName", adminUser},
{"DefaultPassword", ""},
{"DefaultDomainName", "."},
} {
if err := k.SetStringValue(kv.name, kv.val); err != nil {
return fmt.Errorf("set %s: %w", kv.name, err)
}
}
// Safety cap: self-limits even if cleanup task fails to run
return k.SetDWordValue("AutoLogonCount", 20)
}
func disableAutologon() error {
k, err := registry.OpenKey(registry.LOCAL_MACHINE, winlogonKey, registry.SET_VALUE)
if err != nil {
return err
}
defer k.Close()
_ = k.SetStringValue("AutoAdminLogon", "0")
_ = k.DeleteValue("DefaultPassword")
_ = k.DeleteValue("AutoLogonCount")
return nil
}
func registerResumeTask() error {
// No arguments needed - xetup detects resume mode via state file presence.
ps := fmt.Sprintf(`
$action = New-ScheduledTaskAction -Execute '%s'
$trigger = New-ScheduledTaskTrigger -AtLogOn -User '%s'
$settings = New-ScheduledTaskSettingsSet -ExecutionTimeLimit (New-TimeSpan -Hours 4) -MultipleInstances IgnoreNew
$principal = New-ScheduledTaskPrincipal -UserId '%s' -LogonType Interactive -RunLevel Highest
Unregister-ScheduledTask -TaskName 'X9-Resume' -Confirm:$false -ErrorAction SilentlyContinue
Register-ScheduledTask -TaskName 'X9-Resume' -Action $action -Trigger $trigger -Settings $settings -Principal $principal -Force | Out-Null
`, StablePath, adminUser, adminUser)
return runPS(ps)
}
func unregisterResumeTask() error {
return runPS(`Unregister-ScheduledTask -TaskName 'X9-Resume' -Confirm:$false -ErrorAction SilentlyContinue`)
}
func runPS(script string) error {
cmd := newHiddenCmd("powershell.exe",
"-NonInteractive", "-ExecutionPolicy", "Bypass", "-Command", script)
b, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("%s: %w", strings.TrimSpace(string(b)), err)
}
return nil
}
func newHiddenCmd(name string, args ...string) *exec.Cmd {
cmd := exec.Command(name, args...)
cmd.SysProcAttr = &syscall.SysProcAttr{HideWindow: true}
return cmd
}