diff --git a/config/config.json b/config/config.json index ef5c37b..58e1865 100644 --- a/config/config.json +++ b/config/config.json @@ -9,7 +9,8 @@ "username": "adminx9" }, "activation": { - "productKey": "" + "productKey": "", + "kmsServer": "" }, "software": { "install": [ diff --git a/internal/gui/gui.go b/internal/gui/gui.go index 1e95959..07aaaa9 100644 --- a/internal/gui/gui.go +++ b/internal/gui/gui.go @@ -70,16 +70,50 @@ func Run(cfg config.Config, runCfg runner.RunConfig, cfgPath string) { // Resume mode // -------------------------------------------------------------------------- +// maxStepRetries is the maximum number of reboot cycles a single step is +// allowed before it is marked as ERROR and skipped. +const maxStepRetries = 5 + func resumePhase(st *state.State, runCfg runner.RunConfig) { runCfg.LogFile = st.LogFile + // Initialise retry counts (backward-compat with older state files) + if st.RetryCounts == nil { + st.RetryCounts = make(map[string]int) + } + steps := runner.StepsByIDs(st.PendingSteps) + // StepsByIDs returns Enabled=false; resume steps must be enabled. + for i := range steps { + steps[i].Enabled = true + } + + // Check retry limits — skip steps that have been retried too many times + for i := range steps { + if st.RetryCounts[steps[i].ID] >= maxStepRetries { + steps[i].Enabled = false + st.Results = append(st.Results, state.StepResult{ + StepID: steps[i].ID, + Num: steps[i].Num, + Name: steps[i].Name, + Status: "ERROR", + }) + } + } + results, needsReboot := runPhase(runCfg, steps, true) // Accumulate completed results (NeedsReboot step excluded – runs again) newResults := append(st.Results, toStateResults(results)...) //nolint:gocritic if needsReboot { + // Increment retry counts for steps that requested reboot + for _, r := range results { + if r.NeedsReboot { + st.RetryCounts[r.Step.ID]++ + } + } + // Update state and reboot – infrastructure already in place (running as adminx9) pending := pendingStepIDs(steps, results) newSt := &state.State{ @@ -88,9 +122,11 @@ func resumePhase(st *state.State, runCfg runner.RunConfig) { LogFile: st.LogFile, PendingSteps: pending, Results: newResults, + RetryCounts: st.RetryCounts, } if err := state.Save(newSt); err != nil { walk.MsgBox(nil, "Chyba", "Nelze ulozit stav: "+err.Error(), walk.MsgBoxIconError) + return // do NOT reboot if state was not persisted } reboot() return @@ -636,12 +672,9 @@ func donePhase(currentResults []runner.Result, prevResults []state.StepResult) { summaryText := fmt.Sprintf("OK: %d CHYBY: %d PRESKOCENO: %d", ok, errs, skipped) - // Send email report (non-blocking, best-effort) + // Send email report (non-blocking; report.Send retries and saves local copy) go func() { - if err := report.Send(emailRows); err != nil { - // Log but don't block - deployment is done - _ = err - } + _ = report.Send(emailRows) }() cancelReboot := make(chan struct{}) diff --git a/internal/prereboot/prereboot_windows.go b/internal/prereboot/prereboot_windows.go index 5f44fd9..be378a6 100644 --- a/internal/prereboot/prereboot_windows.go +++ b/internal/prereboot/prereboot_windows.go @@ -61,9 +61,15 @@ func Prepare() error { // Cleanup disables autologon and removes the X9-Resume scheduled task. // Called when all deployment steps have completed successfully. +// Errors are logged to stderr (visible in Deploy.log via runner) but do not +// block completion — deployment is already done at this point. func Cleanup() { - _ = disableAutologon() - _ = unregisterResumeTask() + if err := disableAutologon(); err != nil { + fmt.Fprintf(os.Stderr, "[WARN] Cleanup: disableAutologon failed: %v\n", err) + } + if err := unregisterResumeTask(); err != nil { + fmt.Fprintf(os.Stderr, "[WARN] Cleanup: unregisterResumeTask failed: %v\n", err) + } } // ensureAdminx9User creates the adminx9 local account if absent. @@ -120,9 +126,13 @@ func copyFile(src, dst string) error { if err != nil { return err } - defer w.Close() - _, err = io.Copy(w, r) - return err + if _, err = io.Copy(w, r); err != nil { + w.Close() + return err + } + // Explicit Close: on Windows the actual flush happens here. + // A deferred close would silently drop write errors (e.g. disk full). + return w.Close() } func setAutologon() error { diff --git a/internal/report/report.go b/internal/report/report.go index 7db0b7e..9225c9c 100644 --- a/internal/report/report.go +++ b/internal/report/report.go @@ -5,6 +5,7 @@ import ( "fmt" "net/smtp" "os" + "path/filepath" "strings" "time" ) @@ -19,6 +20,9 @@ const ( mailTo = "net@x9.cz" ) +// localReportPath is where a local HTML copy of the report is always saved. +const localReportPath = `C:\X9\report.html` + // StepResult holds one row of the deployment report. type StepResult struct { Num string @@ -27,8 +31,9 @@ type StepResult struct { Elapsed time.Duration } -// Send emails the deployment report. Non-fatal: returns error but caller -// should log it and continue (deployment is already done). +// Send builds the deployment report, saves a local HTML copy to C:\X9\, +// and emails it via SMTP with retries. Returns the last SMTP error if all +// attempts fail (the local copy is always written regardless). func Send(results []StepResult) error { hostname, _ := os.Hostname() now := time.Now().Format("2006-01-02 15:04") @@ -36,6 +41,31 @@ func Send(results []StepResult) error { subject := fmt.Sprintf("xetup report %s", hostname) body := buildHTML(results, hostname, now) + // Always save local copy so technician has a record even if SMTP fails + _ = os.MkdirAll(filepath.Dir(localReportPath), 0755) + if err := os.WriteFile(localReportPath, []byte(body), 0644); err != nil { + fmt.Fprintf(os.Stderr, "[WARN] Failed to save local report: %v\n", err) + } + + // Retry SMTP up to 3 times with exponential backoff (1s, 5s, 15s) + delays := []time.Duration{0, 1 * time.Second, 5 * time.Second} + var lastErr error + for attempt, delay := range delays { + if delay > 0 { + time.Sleep(delay) + } + if err := sendMail(subject, body); err != nil { + lastErr = err + fmt.Fprintf(os.Stderr, "[WARN] Email attempt %d/3 failed: %v\n", attempt+1, err) + continue + } + return nil + } + fmt.Fprintf(os.Stderr, "[ERROR] All email attempts failed. Local copy saved: %s\n", localReportPath) + return lastErr +} + +func sendMail(subject, body string) error { msg := strings.Join([]string{ "From: " + mailFrom, "To: " + mailTo, diff --git a/internal/runner/runner.go b/internal/runner/runner.go index 1628fc7..b0d78bd 100644 --- a/internal/runner/runner.go +++ b/internal/runner/runner.go @@ -217,6 +217,10 @@ func (r *Runner) Stop() { } } +// silenceTimeout is how long a script may produce no output before the +// watchdog kills it. Active scripts (producing output) are never killed. +const silenceTimeout = 30 * time.Minute + func (r *Runner) runScript(ctx context.Context, step Step, cfgArg string) error { scriptPath := filepath.Join(r.cfg.ScriptsDir, step.ScriptName) @@ -251,8 +255,24 @@ func (r *Runner) runScript(ctx context.Context, step Step, cfgArg string) error return err } + // Watchdog: kill script if it produces no output for silenceTimeout. + // The timer is reset on every output line, so active scripts run + // indefinitely (e.g. Dell BIOS download producing progress dots). + watchdog := time.AfterFunc(silenceTimeout, func() { + r.onLog(LogLine{ + StepID: step.ID, + Text: fmt.Sprintf("[WATCHDOG] No output for %v - killing script", silenceTimeout), + Level: "ERROR", + }) + if cmd.Process != nil { + cmd.Process.Kill() + } + }) + defer watchdog.Stop() + scanner := bufio.NewScanner(stdout) for scanner.Scan() { + watchdog.Reset(silenceTimeout) line := scanner.Text() if skipPSNoiseLine(line) { continue diff --git a/internal/state/state.go b/internal/state/state.go index f714d76..a36138a 100644 --- a/internal/state/state.go +++ b/internal/state/state.go @@ -25,11 +25,12 @@ type StepResult struct { // State holds everything xetup needs to resume after a reboot. type State struct { - Config config.Config `json:"config"` - ConfigPath string `json:"configPath"` // path to original config.json - LogFile string `json:"logFile"` - PendingSteps []string `json:"pendingSteps"` // step IDs to run next, in canonical order - Results []StepResult `json:"results"` // accumulated across all rounds + Config config.Config `json:"config"` + ConfigPath string `json:"configPath"` // path to original config.json + LogFile string `json:"logFile"` + PendingSteps []string `json:"pendingSteps"` // step IDs to run next, in canonical order + Results []StepResult `json:"results"` // accumulated across all rounds + RetryCounts map[string]int `json:"retryCounts,omitempty"` // per-step reboot retry counter } // Load reads the state file. Returns a non-nil error when the file is absent. @@ -45,17 +46,24 @@ func Load() (*State, error) { return &s, nil } -// Save writes the state file, creating parent directories as needed. +// Save writes the state file atomically (write-to-temp + rename), creating +// parent directories as needed. This prevents corruption if the system +// crashes mid-write (e.g. BSOD, power loss). func Save(s *State) error { p := statePath() - if err := os.MkdirAll(filepath.Dir(p), 0755); err != nil { + dir := filepath.Dir(p) + if err := os.MkdirAll(dir, 0755); err != nil { return err } data, err := json.MarshalIndent(s, "", " ") if err != nil { return err } - return os.WriteFile(p, data, 0644) + tmp := p + ".tmp" + if err := os.WriteFile(tmp, data, 0644); err != nil { + return err + } + return os.Rename(tmp, p) } // Delete removes the state file. Silently ignores not-found. diff --git a/scripts/02-software.ps1 b/scripts/02-software.ps1 index 216493d..6989b3f 100644 --- a/scripts/02-software.ps1 +++ b/scripts/02-software.ps1 @@ -14,7 +14,7 @@ 7-zip-7zip-7zip: Installs 7-Zip (winget ID: 7zip.7zip). Used for archive management. Silent install with --accept-package-agreements --accept-source-agreements flags required for unattended deployment. adobe-acrobat-reader-64-bit-adobe-acroba: Installs Adobe Acrobat Reader DC 64-bit (Adobe.Acrobat.Reader.64-bit). Required as the default PDF viewer to prevent Edge from handling PDFs in browser mode, which limits functionality. openvpn-connect-openvpntechnologies-open: Installs OpenVPN Connect client. Used for client VPN access when the client network requires a VPN. The ovpn profile and credentials are configured separately per client. - atera-agent-install: Atera RMM agent downloaded from x9.servicedesk.atera.com and installed via msiexec /qn. During install, Atera MSI shows an interactive MFA window - technician enters the code to complete registration. Agent enables MSP monitoring, remote access, and ticketing integration. + atera-agent-install: Atera RMM agent downloaded from x9.servicedesk.atera.com and installed via msiexec /qb. During install, Atera MSI shows an interactive MFA window - technician enters the code to complete registration. Agent enables MSP monitoring, remote access, and ticketing integration. adobe-pdf-default-pdf-acrord32-po-instal: Sets .pdf -> AcroRd32 file association after Acrobat install via HKCR (system-wide, no UserChoice hash issue). UCPD driver is stopped immediately before the write and restarted after to ensure the association persists across Edge updates. ucpd-sys-kernel-driver-od-feb-2024-bloku: UCPD.sys (User Choice Protection Driver) is stopped before the PDF association write and restarted after. Pattern: Stop-Service ucpd -> set HKCR\.pdf -> Start-Service ucpd. Implemented in this script. #> @@ -90,7 +90,15 @@ if (Get-Feature $Config "software" "wingetInstalls") { # Wait for all jobs and collect results Write-Log " Waiting for $($jobs.Count) installs to complete..." -Level INFO - $jobs | Wait-Job | Out-Null + $jobs | Wait-Job -Timeout 600 | Out-Null + + # Kill any jobs that are still running after timeout + foreach ($job in $jobs) { + if ($job.State -eq "Running") { + Write-Log " Timeout: $($job.Name) - killing" -Level ERROR + Stop-Job -Job $job + } + } foreach ($job in $jobs) { $r = Receive-Job -Job $job @@ -124,8 +132,15 @@ if (Get-Feature $Config "software" "pdfDefault") { if ($ucpdSvc) { try { Stop-Service -Name "ucpd" -Force -ErrorAction Stop - $ucpdStopped = $true - Write-Log " UCPD driver stopped" -Level OK + # Wait for the service to fully stop before writing association + Start-Sleep -Seconds 2 + $svcState = (Get-Service -Name "ucpd" -ErrorAction SilentlyContinue).Status + if ($svcState -eq "Stopped") { + $ucpdStopped = $true + Write-Log " UCPD driver stopped" -Level OK + } else { + Write-Log " UCPD still in state '$svcState' - association may not persist" -Level WARN + } } catch { Write-Log " Could not stop UCPD: $_ (association may not persist on some builds)" -Level WARN @@ -188,7 +203,7 @@ if (Get-Feature $Config "software" "pdfDefault") { # ----------------------------------------------------------------------- # Install Atera RMM Agent -# Download MSI from Atera dashboard API, install via msiexec /qn. +# Download MSI from Atera dashboard API, install via msiexec /qb. # During install, the Atera MSI shows an interactive MFA window - # the technician enters the code to complete agent registration. # ----------------------------------------------------------------------- @@ -204,7 +219,7 @@ if (Get-Feature $Config "software" "ateraAgent") { Write-Log " Download complete" -Level OK Write-Log " Running installer (MFA window will appear)..." -Level INFO - $msiProc = Start-Process msiexec -ArgumentList "/i `"$ateraMsi`" /qn" -Wait -PassThru + $msiProc = Start-Process msiexec -ArgumentList "/i `"$ateraMsi`" /qb" -Wait -PassThru if ($msiProc.ExitCode -eq 0) { Write-Log " Atera agent installed (msiexec exit 0)" -Level OK } else { diff --git a/scripts/03-system-registry.ps1 b/scripts/03-system-registry.ps1 index 01df908..795c3ff 100644 --- a/scripts/03-system-registry.ps1 +++ b/scripts/03-system-registry.ps1 @@ -141,6 +141,8 @@ function Set-Reg { # Retry 2: write via scheduled task running as SYSTEM # SYSTEM has full registry access regardless of key ACL + $tempScript = $null + $taskName = $null try { $regType = switch ($Type) { "DWord" { "REG_DWORD" } @@ -168,8 +170,6 @@ function Set-Reg { Register-ScheduledTask -TaskName $taskName -InputObject $task -Force | Out-Null Start-ScheduledTask -TaskName $taskName Start-Sleep -Seconds 2 - Unregister-ScheduledTask -TaskName $taskName -Confirm:$false -ErrorAction SilentlyContinue - Remove-Item $tempScript -Force -ErrorAction SilentlyContinue # Verify it was written $written = (Get-ItemProperty -Path $Path -Name $Name -ErrorAction SilentlyContinue).$Name @@ -182,6 +182,14 @@ function Set-Reg { catch { Write-Log " FAILED $Path\$Name - $_" -Level ERROR } + finally { + if ($taskName) { + Unregister-ScheduledTask -TaskName $taskName -Confirm:$false -ErrorAction SilentlyContinue + } + if ($tempScript) { + Remove-Item $tempScript -Force -ErrorAction SilentlyContinue + } + } } } diff --git a/scripts/04-default-profile.ps1 b/scripts/04-default-profile.ps1 index 943d252..5b6d31e 100644 --- a/scripts/04-default-profile.ps1 +++ b/scripts/04-default-profile.ps1 @@ -236,7 +236,8 @@ $pinList "@ - $taskbarLayoutXml | Set-Content -Path "$taskbarLayoutDir\LayoutModification.xml" -Encoding UTF8 -Force + $utf8NoBom = New-Object System.Text.UTF8Encoding $false + [System.IO.File]::WriteAllText("$taskbarLayoutDir\LayoutModification.xml", $taskbarLayoutXml, $utf8NoBom) Write-Log " Taskbar LayoutModification.xml written (profile: $ProfileType)" -Level OK # NumLock on startup @@ -346,18 +347,26 @@ $pinList } finally { # ----------------------------------------------------------------------- - # Unload Default hive - always, even on error + # Unload Default hive - always, even on error. Retry because GC and + # other processes (antivirus) may hold handles briefly. # ----------------------------------------------------------------------- Write-Log "Unloading Default hive" -Level INFO - [GC]::Collect() - [GC]::WaitForPendingFinalizers() - Start-Sleep -Milliseconds 500 + $unloaded = $false + for ($attempt = 1; $attempt -le 5; $attempt++) { + [GC]::Collect() + [GC]::WaitForPendingFinalizers() + Start-Sleep -Milliseconds ($attempt * 500) - $unloadResult = & reg unload "HKU\$hiveKey" 2>&1 - if ($LASTEXITCODE -eq 0) { - Write-Log "Default hive unloaded" -Level OK - } else { - Write-Log "Failed to unload Default hive: $unloadResult" -Level ERROR + $unloadResult = & reg unload "HKU\$hiveKey" 2>&1 + if ($LASTEXITCODE -eq 0) { + Write-Log "Default hive unloaded (attempt $attempt)" -Level OK + $unloaded = $true + break + } + } + if (-not $unloaded) { + Write-Log "Failed to unload Default hive after 5 attempts: $unloadResult" -Level ERROR + Write-Log " New user profiles may not inherit all settings until next reboot" -Level WARN } } diff --git a/scripts/08-activation.ps1 b/scripts/08-activation.ps1 index e933f96..3b8a6b1 100644 --- a/scripts/08-activation.ps1 +++ b/scripts/08-activation.ps1 @@ -11,7 +11,7 @@ .ITEMS oa3-bios-uefi-klic-kontrola-embedded-ke: Checks for OA3 embedded product key in BIOS/UEFI firmware via SoftwareLicensingService.OA3xOriginalProductKey WMI query. If a key is found, it is installed via slmgr /ipk and activation is attempted. Most OEM machines (since Win8 OA3) have a digital entitlement key in firmware - this path handles them without requiring a key in config.json. klic-z-config-json-activation-productkey: Reads activation.productKey from config.json. Installs via slmgr.vbs /ipk and activates via slmgr.vbs /ato. Supports MAK (Multiple Activation Key) for volume licensing without KMS, and retail keys. Takes priority over GVLK fallback. - fallback-na-gvlk-kms-client-key-dle-edic: When no key is in config, detects Windows edition via (Get-WmiObject SoftwareLicensingProduct).Name and maps to Microsoft's published GVLK table. Pro: W269N-WFGWX-YVC9B-4J6C9-T83GX, Enterprise: NPPR9-FWDCX-D2C8J-H872K-2YT43, Home: TX9XD-98N7V-6WMQ6-BX7FG-H8Q99. + fallback-na-gvlk-kms-client-key-dle-edic: When no key is in config, detects Windows edition via EditionID registry value (HKLM:\SOFTWARE\Microsoft\Windows NT\CurrentVersion\EditionID) and maps to Microsoft's published GVLK table. EditionID is not localized, unlike Win32_OperatingSystem.Caption. Professional: W269N-WFGWX-YVC9B-4J6C9-T83GX, Enterprise: NPPR9-FWDCX-D2C8J-H872K-2YT43, Core (Home): TX9XD-98N7V-6WMQ6-BX7FG-H8Q99. volitelny-kms-server-activation-kmsserve: If activation.kmsServer is in config.json, runs slmgr.vbs /skms : before /ato. Used for clients with on-premises KMS infrastructure (common in larger organizations with volume licensing). preskocit-pokud-jiz-aktivovano: Queries Win32_WindowsLicenseStatus or SoftwareLicensingProduct to check LicenseStatus. Value 1 = Licensed (fully activated). Script skips activation attempt and logs "Windows already activated" to avoid unnecessary slmgr calls. typ-klice-mak-vs-kms-vs-retail: Key type selection depends on client's Microsoft licensing: MAK = volume license key activates online against Microsoft (limited activations), KMS = requires KMS server on network (VLSC subscription), Retail = individual license from Microsoft Store or OEM. @@ -31,18 +31,15 @@ $Config = Load-Config $ConfigPath # Replace with your MAK/retail key for standalone activation. # ----------------------------------------------------------------------- $KmsKeys = @{ - # Windows 11 - "Windows 11 Pro" = "W269N-WFGWX-YVC9B-4J6C9-T83GX" - "Windows 11 Pro N" = "MH37W-N47XK-V7XM9-C7227-GCQG9" - "Windows 11 Pro Education" = "6TP4R-GNPTD-KYYHQ-7B7DP-J447Y" - "Windows 11 Education" = "NW6C2-QMPVW-D7KKK-3GKT6-VCFB2" - "Windows 11 Enterprise" = "NPPR9-FWDCX-D2C8J-H872K-2YT43" - # Windows 10 - "Windows 10 Pro" = "W269N-WFGWX-YVC9B-4J6C9-T83GX" - "Windows 10 Pro N" = "MH37W-N47XK-V7XM9-C7227-GCQG9" - "Windows 10 Education" = "NW6C2-QMPVW-D7KKK-3GKT6-VCFB2" - "Windows 10 Enterprise" = "NPPR9-FWDCX-D2C8J-H872K-2YT43" - "Windows 10 Home" = "TX9XD-98N7V-6WMQ6-BX7FG-H8Q99" + # EditionID -> GVLK (source: docs.microsoft.com/windows-server/get-started/kms-client-activation-keys) + # Same keys work for both Windows 10 and Windows 11 + "Professional" = "W269N-WFGWX-YVC9B-4J6C9-T83GX" + "ProfessionalN" = "MH37W-N47XK-V7XM9-C7227-GCQG9" + "ProfessionalEducation" = "6TP4R-GNPTD-KYYHQ-7B7DP-J447Y" + "Education" = "NW6C2-QMPVW-D7KKK-3GKT6-VCFB2" + "Enterprise" = "NPPR9-FWDCX-D2C8J-H872K-2YT43" + "Core" = "TX9XD-98N7V-6WMQ6-BX7FG-H8Q99" + "ProfessionalWorkstation" = "NRG8B-VKK3Q-CXVCJ-9G2XF-6Q84J" } # ----------------------------------------------------------------------- @@ -93,14 +90,15 @@ if ($licenseStatus -eq 1) { $keyToUse = $oa3Key Write-Log " Using OA3 key from firmware" -Level INFO } else { - # Find matching GVLK key by OS name + # Find matching GVLK key by EditionID (registry value, not localized) + $editionId = (Get-ItemProperty "HKLM:\SOFTWARE\Microsoft\Windows NT\CurrentVersion" -Name EditionID -ErrorAction SilentlyContinue).EditionID + Write-Log " EditionID: $editionId" -Level INFO $keyToUse = $null - foreach ($entry in $KmsKeys.GetEnumerator()) { - if ($osCaption -like "*$($entry.Key)*") { - $keyToUse = $entry.Value - Write-Log " Matched GVLK key for: $($entry.Key)" -Level INFO - break - } + if ($editionId -and $KmsKeys.ContainsKey($editionId)) { + $keyToUse = $KmsKeys[$editionId] + Write-Log " Matched GVLK key for edition: $editionId" -Level INFO + } else { + Write-Log " No GVLK key for edition: $editionId" -Level WARN } } diff --git a/scripts/12-windows-update.ps1 b/scripts/12-windows-update.ps1 index 184f063..c003285 100644 --- a/scripts/12-windows-update.ps1 +++ b/scripts/12-windows-update.ps1 @@ -57,7 +57,7 @@ try { exit 1 } -if (-not $available -or $available.Count -eq 0) { +if (-not $available -or @($available).Count -eq 0) { Write-Log " System is fully up to date" -Level OK Write-Log "Step 12 complete" -Level OK exit 0