xetup/internal/runner/runner.go
X9 Dev d30767ef8b
Some checks failed
release / build-and-release (push) Failing after 32s
fix: comprehensive reliability and robustness improvements
Critical fixes:
- Fix resume mode: StepsByIDs returned Enabled=false, all resume steps
  would be SKIPPED (deployment could never resume after reboot)
- Add reboot loop protection: per-step retry counter (max 5) prevents
  infinite reboot cycles when a step always exits with code 9
- Block reboot when state.Save() fails in resumePhase (prevents state
  loss leading to full restart from scratch)
- Atomic state file write (write-to-tmp + rename) prevents JSON
  corruption on BSOD/power loss mid-write
- Script watchdog: kills scripts after 30 min of no output (resets on
  each line, so active long-running scripts are never killed)
- Fix copyFile: check Close() error explicitly instead of deferred
  close that silently drops flush errors (e.g. disk full)

High severity:
- Cleanup() now logs errors instead of silently ignoring them
- Email report: 3 retries with backoff + always saves C:\X9\report.html
- Winget parallel jobs: 10 min timeout, kill hung jobs
- UCPD stop verification: 2s wait + state check before PDF association
- Atera installer: /qn -> /qb so MFA window can appear
- GVLK activation: match by EditionID (registry, not localized) instead
  of fragile OS caption string matching

Medium severity:
- Default profile hive unload: retry loop (5 attempts, increasing delay)
- LayoutModification.xml: UTF-8 without BOM (PS 5.1 Set-Content adds BOM)
- Set-Reg SYSTEM task: try/finally ensures temp file + task cleanup
- Windows Update: @($available).Count for PS 5.1 single-result edge case
- config.json: add missing kmsServer field in activation section

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-28 11:49:43 +02:00

437 lines
13 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Package runner executes PowerShell deployment scripts and streams log output.
package runner
import (
"bufio"
"context"
"encoding/json"
"errors"
"fmt"
"os"
"os/exec"
"path"
"path/filepath"
"strings"
"time"
)
// ErrNeedsReboot is returned by runScript when a PowerShell script exits with
// code 9, signalling that it completed successfully but requires a system
// reboot before deployment can continue.
var ErrNeedsReboot = errors.New("reboot required")
// Step describes a single deployment step.
type Step struct {
ID string // e.g. "adminAccount"
Num string // display number e.g. "00"
Name string
ScriptName string // e.g. "00-admin-account.ps1"
Enabled bool
}
// AllSteps returns the ordered list of deployment steps.
// Order matters: activation early (unlocks features), pcIdentity late (rename
// needs reboot), windowsUpdate last (reboot cycle).
func AllSteps() []Step {
return []Step{
{ID: "adminAccount", Num: "00", Name: "Admin ucet", ScriptName: "00-admin-account.ps1"},
{ID: "activation", Num: "08", Name: "Windows aktivace", ScriptName: "08-activation.ps1"},
{ID: "bloatware", Num: "01", Name: "Bloatware removal", ScriptName: "01-bloatware.ps1"},
{ID: "software", Num: "02", Name: "Software (winget)", ScriptName: "02-software.ps1"},
{ID: "systemRegistry", Num: "03", Name: "System Registry (HKLM)", ScriptName: "03-system-registry.ps1"},
{ID: "defaultProfile", Num: "04", Name: "Profil + personalizace", ScriptName: "04-default-profile.ps1"},
{ID: "backinfo", Num: "07", Name: "BackInfo", ScriptName: "07-backinfo.ps1"},
{ID: "network", Num: "10", Name: "Network discovery", ScriptName: "10-network.ps1"},
{ID: "dellUpdate", Num: "11", Name: "Dell Command | Update", ScriptName: "11-dell-update.ps1"},
{ID: "pcIdentity", Num: "09", Name: "PC identita", ScriptName: "09-pc-identity.ps1"},
{ID: "windowsUpdate", Num: "12", Name: "Windows Update", ScriptName: "12-windows-update.ps1"},
}
}
// Feature is a single toggleable sub-item within a deployment step.
type Feature struct {
ID string
Label string
}
// StepFeatures returns per-step feature lists. Steps absent from this map
// have no sub-features and are controlled at the step level only.
func StepFeatures() map[string][]Feature {
return map[string][]Feature{
"software": {
{ID: "wingetInstalls", Label: "Instalace SW ze seznamu (winget)"},
{ID: "pdfDefault", Label: "Adobe Reader jako vychozi PDF"},
{ID: "ateraAgent", Label: "Atera RMM agent"},
},
"systemRegistry": {
{ID: "systemTweaks", Label: "Windows tweaky (Widgets, GameDVR, Recall...)"},
{ID: "edgePolicies", Label: "Edge policies (tlacitka, vyhledavac, telemetrie)"},
{ID: "oneDriveUninstall", Label: "OneDrive uninstall (consumer pre-install)"},
{ID: "powercfg", Label: "Nastaveni napajeni (timeout AC/DC)"},
{ID: "proxyDisable", Label: "Zakaz WPAD proxy auto-detect"},
},
"defaultProfile": {
{ID: "taskbarTweaks", Label: "Taskbar zarovnani, tlacitka, layout XML"},
{ID: "startMenuTweaks", Label: "Start menu cisteni pinu, Bing, Copilot"},
{ID: "explorerTweaks", Label: "Explorer pripony, LaunchTo, ShowRecent"},
},
"dellUpdate": {
{ID: "drivers", Label: "Dell drivery + firmware"},
{ID: "bios", Label: "Dell BIOS update"},
},
}
}
// SelectableItem is a single toggleable row in the TUI checklist.
// It represents either a whole step (FeatureID == "") or a specific feature.
type SelectableItem struct {
Key string // "stepID" or "stepID.featureID"
StepID string
FeatureID string // empty for step-level items
Label string
Num string
}
// AllSelectableItems returns the flat ordered list of all TUI toggle rows.
// Steps with features are expanded to individual feature rows.
// Steps without features appear as a single step-level row.
func AllSelectableItems() []SelectableItem {
steps := AllSteps()
features := StepFeatures()
var items []SelectableItem
for _, s := range steps {
feats, hasFeatures := features[s.ID]
if !hasFeatures {
items = append(items, SelectableItem{
Key: s.ID,
StepID: s.ID,
Label: s.Num + " " + s.Name,
Num: s.Num,
})
} else {
for _, f := range feats {
items = append(items, SelectableItem{
Key: s.ID + "." + f.ID,
StepID: s.ID,
FeatureID: f.ID,
Label: s.Num + " " + f.Label,
Num: s.Num,
})
}
}
}
return items
}
// RunConfig holds runtime parameters passed to each script.
type RunConfig struct {
ScriptsDir string
ConfigPath string
LogFile string
ProfileType string
}
// Result is the outcome of a single step.
type Result struct {
Step Step
Status string // "OK", "ERROR", "SKIPPED", "CANCELLED"
Elapsed time.Duration
NeedsReboot bool // true when the script exited with code 9
}
// LogLine is a single output line from a running script.
type LogLine struct {
StepID string
Text string
Level string // INFO, OK, ERROR, WARN, STEP - parsed from [LEVEL] prefix
}
// Runner executes deployment steps sequentially.
type Runner struct {
cfg RunConfig
onLog func(LogLine)
onStepStart func(Step)
onResult func(Result)
cancel context.CancelFunc
}
// New creates a Runner. onLog is called for each output line, onResult after each step.
// onStepStart (optional) is called immediately before a step's script is launched.
func New(cfg RunConfig, onLog func(LogLine), onStepStart func(Step), onResult func(Result)) *Runner {
return &Runner{cfg: cfg, onLog: onLog, onStepStart: onStepStart, onResult: onResult}
}
// Run executes enabled steps sequentially. Blocks until done or context cancelled.
func (r *Runner) Run(ctx context.Context, steps []Step) []Result {
ctx, cancel := context.WithCancel(ctx)
r.cancel = cancel
defer cancel()
// Write config JSON to temp file so scripts can read it
cfgArg := r.cfg.ConfigPath
var results []Result
for _, step := range steps {
if !step.Enabled {
res := Result{Step: step, Status: "SKIPPED"}
r.onResult(res)
results = append(results, res)
continue
}
if r.onStepStart != nil {
r.onStepStart(step)
}
start := time.Now()
err := r.runScript(ctx, step, cfgArg)
elapsed := time.Since(start)
status := "OK"
needsReboot := false
if err != nil {
if errors.Is(err, ErrNeedsReboot) {
needsReboot = true
// status remains "OK": step completed, reboot is just required to continue
} else if ctx.Err() != nil {
status = "CANCELLED"
} else {
status = "ERROR"
}
}
res := Result{Step: step, Status: status, Elapsed: elapsed, NeedsReboot: needsReboot}
r.onResult(res)
results = append(results, res)
if ctx.Err() != nil || needsReboot {
break
}
}
return results
}
// Stop cancels the running deployment.
func (r *Runner) Stop() {
if r.cancel != nil {
r.cancel()
}
}
// silenceTimeout is how long a script may produce no output before the
// watchdog kills it. Active scripts (producing output) are never killed.
const silenceTimeout = 30 * time.Minute
func (r *Runner) runScript(ctx context.Context, step Step, cfgArg string) error {
scriptPath := filepath.Join(r.cfg.ScriptsDir, step.ScriptName)
// Build argument list
args := []string{
"-NonInteractive",
"-ExecutionPolicy", "Bypass",
"-File", scriptPath,
"-LogFile", r.cfg.LogFile,
}
// Pass config path - script loads JSON itself via common.ps1 Load-Config
if cfgArg != "" {
args = append(args, "-ConfigPath", cfgArg)
}
// ProfileType for step 04
if step.ID == "defaultProfile" && r.cfg.ProfileType != "" {
args = append(args, "-ProfileType", r.cfg.ProfileType)
}
cmd := exec.CommandContext(ctx, "powershell.exe", args...)
hideWindow(cmd) // prevent PS console window from appearing over the GUI
stdout, err := cmd.StdoutPipe()
if err != nil {
return err
}
cmd.Stderr = cmd.Stdout // merge stderr into stdout
if err := cmd.Start(); err != nil {
return err
}
// Watchdog: kill script if it produces no output for silenceTimeout.
// The timer is reset on every output line, so active scripts run
// indefinitely (e.g. Dell BIOS download producing progress dots).
watchdog := time.AfterFunc(silenceTimeout, func() {
r.onLog(LogLine{
StepID: step.ID,
Text: fmt.Sprintf("[WATCHDOG] No output for %v - killing script", silenceTimeout),
Level: "ERROR",
})
if cmd.Process != nil {
cmd.Process.Kill()
}
})
defer watchdog.Stop()
scanner := bufio.NewScanner(stdout)
for scanner.Scan() {
watchdog.Reset(silenceTimeout)
line := scanner.Text()
if skipPSNoiseLine(line) {
continue
}
r.onLog(LogLine{
StepID: step.ID,
Text: line,
Level: parseLevel(line),
})
}
waitErr := cmd.Wait()
if waitErr != nil {
var exitErr *exec.ExitError
if errors.As(waitErr, &exitErr) && exitErr.ExitCode() == 9 {
return ErrNeedsReboot
}
}
return waitErr
}
// skipPSNoiseLine returns true for PowerShell stderr noise that clutters the log:
// multi-line error blocks (At line:N, CategoryInfo, FullyQualifiedErrorId, etc.),
// blank lines, and VERBOSE: prefix lines already handled by Write-Log.
func skipPSNoiseLine(line string) bool {
trimmed := strings.TrimSpace(line)
if trimmed == "" {
return true
}
for _, prefix := range []string{
"At line:",
"+ CategoryInfo",
"+ FullyQualifiedErrorId",
"+ PositionMessage",
"VERBOSE:",
"DEBUG:",
} {
if strings.HasPrefix(trimmed, prefix) {
return true
}
}
// PS error continuation lines start with spaces + "+" or "~"
if len(trimmed) > 0 && (trimmed[0] == '+' || strings.HasPrefix(trimmed, "~")) {
return true
}
return false
}
// parseLevel extracts the log level from lines formatted as "[HH:mm:ss] [LEVEL] message".
func parseLevel(line string) string {
if strings.Contains(line, "] [OK]") {
return "OK"
}
if strings.Contains(line, "] [ERROR]") {
return "ERROR"
}
if strings.Contains(line, "] [WARN]") {
return "WARN"
}
if strings.Contains(line, "] [STEP]") {
return "STEP"
}
return "INFO"
}
// ExtractScripts unpacks embedded scripts to a temp directory.
// Returns the directory path. Caller is responsible for cleanup.
func ExtractScripts(fs interface{ ReadDir(string) ([]os.DirEntry, error); ReadFile(string) ([]byte, error) }, tmpDir string) error {
entries, err := fs.ReadDir("scripts")
if err != nil {
return fmt.Errorf("read embedded scripts: %w", err)
}
scriptsDir := filepath.Join(tmpDir, "scripts")
if err := os.MkdirAll(scriptsDir, 0755); err != nil {
return err
}
for _, e := range entries {
if e.IsDir() {
continue
}
// embed.FS always uses forward slashes regardless of OS
data, err := fs.ReadFile(path.Join("scripts", e.Name()))
if err != nil {
return err
}
if err := os.WriteFile(filepath.Join(scriptsDir, e.Name()), data, 0644); err != nil {
return err
}
}
return nil
}
// ExtractAssets unpacks embedded assets to tmpDir/assets.
func ExtractAssets(fs interface{ ReadDir(string) ([]os.DirEntry, error); ReadFile(string) ([]byte, error) }, tmpDir string) error {
return extractDir(fs, "assets", tmpDir)
}
func extractDir(fs interface{ ReadDir(string) ([]os.DirEntry, error); ReadFile(string) ([]byte, error) }, src, dstBase string) error {
entries, err := fs.ReadDir(src)
if err != nil {
return err
}
dst := filepath.Join(dstBase, filepath.FromSlash(src))
if err := os.MkdirAll(dst, 0755); err != nil {
return err
}
for _, e := range entries {
// embed.FS always uses forward slashes regardless of OS
srcPath := path.Join(src, e.Name())
dstPath := filepath.Join(dstBase, filepath.FromSlash(srcPath))
if e.IsDir() {
if err := extractDir(fs, srcPath, dstBase); err != nil {
return err
}
continue
}
data, err := fs.ReadFile(srcPath)
if err != nil {
return err
}
if err := os.WriteFile(dstPath, data, 0644); err != nil {
return err
}
}
return nil
}
// StepsByIDs returns steps from AllSteps() whose IDs are in the given list,
// preserving the canonical AllSteps() order.
func StepsByIDs(ids []string) []Step {
want := make(map[string]bool, len(ids))
for _, id := range ids {
want[id] = true
}
var result []Step
for _, s := range AllSteps() {
if want[s.ID] {
result = append(result, s)
}
}
return result
}
// StepByID returns the Step with the given ID, and true if found.
func StepByID(id string) (Step, bool) {
for _, s := range AllSteps() {
if s.ID == id {
return s, true
}
}
return Step{}, false
}
// WriteConfig serialises cfg to a temp JSON file and returns its path.
func WriteConfig(cfg interface{}, tmpDir string) (string, error) {
path := filepath.Join(tmpDir, "config-runtime.json")
data, err := json.MarshalIndent(cfg, "", " ")
if err != nil {
return "", err
}
return path, os.WriteFile(path, data, 0644)
}