Commit 6f73ea6b authored by mashun1's avatar mashun1
Browse files

omniparser

parents
Pipeline #2421 failed with stages
in 0 seconds
#!/usr/bin/env bash
set -Eeuo pipefail
# Configure QEMU for graceful shutdown
QEMU_TERM=""
QEMU_PORT=7100
QEMU_TIMEOUT=110
QEMU_DIR="/run/shm"
QEMU_PID="$QEMU_DIR/qemu.pid"
QEMU_PTY="$QEMU_DIR/qemu.pty"
QEMU_LOG="$QEMU_DIR/qemu.log"
QEMU_OUT="$QEMU_DIR/qemu.out"
QEMU_END="$QEMU_DIR/qemu.end"
rm -f "$QEMU_DIR/qemu.*"
touch "$QEMU_LOG"
_trap() {
func="$1" ; shift
for sig ; do
trap "$func $sig" "$sig"
done
}
boot() {
[ -f "$QEMU_END" ] && return 0
if [ -s "$QEMU_PTY" ]; then
if [ "$(stat -c%s "$QEMU_PTY")" -gt 7 ]; then
local fail=""
if [[ "${BOOT_MODE,,}" == "windows_legacy" ]]; then
grep -Fq "No bootable device." "$QEMU_PTY" && fail="y"
grep -Fq "BOOTMGR is missing" "$QEMU_PTY" && fail="y"
fi
if [ -z "$fail" ]; then
info "Windows has started successfully. You can directly view the VM at http://localhost:8006/vnc.html?view_only=1&autoconnect=1&resize=scale. Wait until setup is complete before interacting manually."
return 0
fi
fi
fi
error "Timeout while waiting for QEMU to boot the machine!"
local pid
pid=$(<"$QEMU_PID")
{ kill -15 "$pid" || true; } 2>/dev/null
return 0
}
ready() {
[ -f "$STORAGE/windows.boot" ] && return 0
[ ! -s "$QEMU_PTY" ] && return 1
if [[ "${BOOT_MODE,,}" == "windows_legacy" ]]; then
local last
local bios="Booting from Hard"
last=$(grep "^Booting.*" "$QEMU_PTY" | tail -1)
[[ "${last,,}" != "${bios,,}"* ]] && return 1
grep -Fq "No bootable device." "$QEMU_PTY" && return 1
grep -Fq "BOOTMGR is missing" "$QEMU_PTY" && return 1
return 0
fi
local line="\"Windows Boot Manager\""
grep -Fq "$line" "$QEMU_PTY" && return 0
return 1
}
finish() {
local pid
local reason=$1
touch "$QEMU_END"
if [ -s "$QEMU_PID" ]; then
pid=$(<"$QEMU_PID")
error "Forcefully terminating Windows, reason: $reason..."
{ kill -15 "$pid" || true; } 2>/dev/null
while isAlive "$pid"; do
sleep 1
# Workaround for zombie pid
[ ! -s "$QEMU_PID" ] && break
done
fi
if [ ! -f "$STORAGE/windows.boot" ] && [ -f "$BOOT" ]; then
# Remove CD-ROM ISO after install
if ready; then
touch "$STORAGE/windows.boot"
if [[ "$REMOVE" != [Nn]* ]]; then
rm -f "$BOOT" 2>/dev/null || true
fi
fi
fi
pid="/var/run/tpm.pid"
[ -s "$pid" ] && pKill "$(<"$pid")"
pid="/var/run/wsdd.pid"
[ -s "$pid" ] && pKill "$(<"$pid")"
fKill "smbd"
closeNetwork
sleep 0.5
echo "❯ Shutdown completed!"
exit "$reason"
}
terminal() {
local dev=""
if [ -s "$QEMU_OUT" ]; then
local msg
msg=$(<"$QEMU_OUT")
if [ -n "$msg" ]; then
if [[ "${msg,,}" != "char"* || "$msg" != *"serial0)" ]]; then
echo "$msg"
fi
dev="${msg#*/dev/p}"
dev="/dev/p${dev%% *}"
fi
fi
if [ ! -c "$dev" ]; then
dev=$(echo 'info chardev' | nc -q 1 -w 1 localhost "$QEMU_PORT" | tr -d '\000')
dev="${dev#*serial0}"
dev="${dev#*pty:}"
dev="${dev%%$'\n'*}"
dev="${dev%%$'\r'*}"
fi
if [ ! -c "$dev" ]; then
error "Device '$dev' not found!"
finish 34 && return 34
fi
QEMU_TERM="$dev"
return 0
}
_graceful_shutdown() {
local code=$?
set +e
if [ -f "$QEMU_END" ]; then
info "Received $1 while already shutting down..."
return
fi
touch "$QEMU_END"
info "Received $1, sending ACPI shutdown signal..."
if [ ! -s "$QEMU_PID" ]; then
error "QEMU PID file does not exist?"
finish "$code" && return "$code"
fi
local pid=""
pid=$(<"$QEMU_PID")
if ! isAlive "$pid"; then
error "QEMU process does not exist?"
finish "$code" && return "$code"
fi
if ! ready; then
info "Cannot send ACPI signal during Windows setup, aborting..."
finish "$code" && return "$code"
fi
# Send ACPI shutdown signal
echo 'system_powerdown' | nc -q 1 -w 1 localhost "${QEMU_PORT}" > /dev/null
local cnt=0
while [ "$cnt" -lt "$QEMU_TIMEOUT" ]; do
sleep 1
cnt=$((cnt+1))
! isAlive "$pid" && break
# Workaround for zombie pid
[ ! -s "$QEMU_PID" ] && break
info "Waiting for Windows to shutdown... ($cnt/$QEMU_TIMEOUT)"
# Send ACPI shutdown signal
echo 'system_powerdown' | nc -q 1 -w 1 localhost "${QEMU_PORT}" > /dev/null
done
if [ "$cnt" -ge "$QEMU_TIMEOUT" ]; then
error "Shutdown timeout reached, aborting..."
fi
finish "$code" && return "$code"
}
SERIAL="pty"
MONITOR="telnet:localhost:$QEMU_PORT,server,nowait,nodelay"
MONITOR+=" -daemonize -D $QEMU_LOG -pidfile $QEMU_PID"
_trap _graceful_shutdown SIGTERM SIGHUP SIGINT SIGABRT SIGQUIT
return 0
#!/usr/bin/env bash
set -Eeuo pipefail
: "${SAMBA:="Y"}"
[[ "$SAMBA" == [Nn]* ]] && return 0
[[ "$NETWORK" == [Nn]* ]] && return 0
hostname="host.lan"
interface="dockerbridge"
if [[ "$DHCP" == [Yy1]* ]]; then
hostname="$IP"
interface="$VM_NET_DEV"
fi
addShare() {
local dir="$1"
local name="$2"
local comment="$3"
mkdir -p "$dir" || return 1
if [ -z "$(ls -A "$dir")" ]; then
chmod 777 "$dir"
{ echo "--------------------------------------------------------"
echo " $APP"
echo " For support visit $SUPPORT"
echo "--------------------------------------------------------"
echo ""
echo "Using this folder you can share files with the host machine."
echo ""
echo "To change its location, include the following bind mount in your compose file:"
echo ""
echo " volumes:"
echo " - \"/home/example:/${name,,}\""
echo ""
echo "Or in your run command:"
echo ""
echo " -v \"/home/example:/${name,,}\""
echo ""
echo "Replace the example path /home/example with the desired shared folder."
echo ""
} | unix2dos > "$dir/readme.txt"
fi
{ echo ""
echo "[$name]"
echo " path = $dir"
echo " comment = $comment"
echo " writable = yes"
echo " guest ok = yes"
echo " guest only = yes"
echo " force user = root"
echo " force group = root"
} >> "/etc/samba/smb.conf"
return 0
}
{ echo "[global]"
echo " server string = Dockur"
echo " netbios name = $hostname"
echo " workgroup = WORKGROUP"
echo " interfaces = $interface"
echo " bind interfaces only = yes"
echo " security = user"
echo " guest account = nobody"
echo " map to guest = Bad User"
echo " server min protocol = NT1"
echo ""
echo " # disable printing services"
echo " load printers = no"
echo " printing = bsd"
echo " printcap name = /dev/null"
echo " disable spoolss = yes"
} > "/etc/samba/smb.conf"
share="/data"
[ ! -d "$share" ] && [ -d "$STORAGE/data" ] && share="$STORAGE/data"
[ ! -d "$share" ] && [ -d "/shared" ] && share="/shared"
[ ! -d "$share" ] && [ -d "$STORAGE/shared" ] && share="$STORAGE/shared"
addShare "$share" "Data" "Shared" || error "Failed to create shared folder!"
[ -d "/data2" ] && addShare "/data2" "Data2" "Shared"
[ -d "/data3" ] && addShare "/data3" "Data3" "Shared"
if ! smbd; then
error "Samba daemon failed to start!"
smbd -i --debug-stdout || true
fi
if [[ "${BOOT_MODE:-}" == "windows_legacy" ]]; then
# Enable NetBIOS on Windows 7 and lower
if ! nmbd; then
error "NetBIOS daemon failed to start!"
nmbd -i --debug-stdout || true
fi
else
# Enable Web Service Discovery on Vista and up
wsdd -i "$interface" -p -n "$hostname" &
echo "$!" > /var/run/wsdd.pid
fi
return 0
<?xml version="1.0" encoding="UTF-8"?>
<unattend xmlns="urn:schemas-microsoft-com:unattend" xmlns:wcm="http://schemas.microsoft.com/WMIConfig/2002/State">
<settings pass="windowsPE">
<component name="Microsoft-Windows-International-Core-WinPE" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS">
<SetupUILanguage>
<UILanguage>en-US</UILanguage>
</SetupUILanguage>
<InputLocale>0409:00000409</InputLocale>
<SystemLocale>en-US</SystemLocale>
<UILanguage>en-US</UILanguage>
<UserLocale>en-US</UserLocale>
</component>
<component name="Microsoft-Windows-Setup" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS">
<DiskConfiguration>
<Disk wcm:action="add">
<DiskID>0</DiskID>
<WillWipeDisk>true</WillWipeDisk>
<CreatePartitions>
<!-- System partition (ESP) -->
<CreatePartition wcm:action="add">
<Order>1</Order>
<Type>EFI</Type>
<Size>128</Size>
</CreatePartition>
<!-- Microsoft reserved partition (MSR) -->
<CreatePartition wcm:action="add">
<Order>2</Order>
<Type>MSR</Type>
<Size>128</Size>
</CreatePartition>
<!-- Windows partition -->
<CreatePartition wcm:action="add">
<Order>3</Order>
<Type>Primary</Type>
<Extend>true</Extend>
</CreatePartition>
</CreatePartitions>
<ModifyPartitions>
<!-- System partition (ESP) -->
<ModifyPartition wcm:action="add">
<Order>1</Order>
<PartitionID>1</PartitionID>
<Label>System</Label>
<Format>FAT32</Format>
</ModifyPartition>
<!-- MSR partition does not need to be modified -->
<ModifyPartition wcm:action="add">
<Order>2</Order>
<PartitionID>2</PartitionID>
</ModifyPartition>
<!-- Windows partition -->
<ModifyPartition wcm:action="add">
<Order>3</Order>
<PartitionID>3</PartitionID>
<Label>Windows</Label>
<Letter>C</Letter>
<Format>NTFS</Format>
</ModifyPartition>
</ModifyPartitions>
</Disk>
</DiskConfiguration>
<ImageInstall>
<OSImage>
<InstallTo>
<DiskID>0</DiskID>
<PartitionID>3</PartitionID>
</InstallTo>
<InstallToAvailablePartition>false</InstallToAvailablePartition>
</OSImage>
</ImageInstall>
<DynamicUpdate>
<Enable>true</Enable>
<WillShowUI>Never</WillShowUI>
</DynamicUpdate>
<UpgradeData>
<Upgrade>false</Upgrade>
<WillShowUI>Never</WillShowUI>
</UpgradeData>
<UserData>
<AcceptEula>true</AcceptEula>
<FullName>Docker</FullName>
<Organization>Windows for Docker</Organization>
</UserData>
<EnableFirewall>false</EnableFirewall>
<Diagnostics>
<OptIn>false</OptIn>
</Diagnostics>
<RunSynchronous>
<RunSynchronousCommand wcm:action="add">
<Order>1</Order>
<Path>reg.exe add "HKLM\SYSTEM\Setup\LabConfig" /v BypassTPMCheck /t REG_DWORD /d 1 /f</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>2</Order>
<Path>reg.exe add "HKLM\SYSTEM\Setup\LabConfig" /v BypassSecureBootCheck /t REG_DWORD /d 1 /f</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>3</Order>
<Path>reg.exe add "HKLM\SYSTEM\Setup\LabConfig" /v BypassRAMCheck /t REG_DWORD /d 1 /f</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>4</Order>
<Path>reg.exe add "HKLM\SYSTEM\Setup\MoSetup" /v AllowUpgradesWithUnsupportedTPMOrCPU /t REG_DWORD /d 1 /f</Path>
</RunSynchronousCommand>
</RunSynchronous>
</component>
</settings>
<settings pass="offlineServicing">
<component name="Microsoft-Windows-LUA-Settings" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS">
<EnableLUA>false</EnableLUA>
</component>
</settings>
<settings pass="generalize">
<component name="Microsoft-Windows-PnPSysprep" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS">
<PersistAllDeviceInstalls>true</PersistAllDeviceInstalls>
</component>
<component name="Microsoft-Windows-Security-SPP" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS">
<SkipRearm>1</SkipRearm>
</component>
</settings>
<settings pass="specialize">
<component name="Microsoft-Windows-Security-SPP-UX" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS">
<SkipAutoActivation>true</SkipAutoActivation>
</component>
<component name="Microsoft-Windows-Shell-Setup" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS">
<ComputerName>*</ComputerName>
<OEMInformation>
<Manufacturer>Dockur</Manufacturer>
<Model>Windows for Docker</Model>
<SupportHours>24/7</SupportHours>
<SupportPhone />
<SupportProvider>Dockur</SupportProvider>
<SupportURL>https://github.com/dockur/windows/issues</SupportURL>
</OEMInformation>
<OEMName>Windows for Docker</OEMName>
</component>
<component name="Microsoft-Windows-ErrorReportingCore" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS">
<DisableWER>1</DisableWER>
</component>
<component name="Microsoft-Windows-IE-InternetExplorer" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS">
<DisableAccelerators>true</DisableAccelerators>
<DisableFirstRunWizard>true</DisableFirstRunWizard>
<Home_Page>https://google.com</Home_Page>
<Help_Page>about:blank</Help_Page>
</component>
<component name="Microsoft-Windows-IE-InternetExplorer" processorArchitecture="wow64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS">
<DisableAccelerators>true</DisableAccelerators>
<DisableFirstRunWizard>true</DisableFirstRunWizard>
<Home_Page>https://google.com</Home_Page>
<Help_Page>about:blank</Help_Page>
</component>
<component name="Microsoft-Windows-SQMApi" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS">
<CEIPEnabled>0</CEIPEnabled>
</component>
<component name="Microsoft-Windows-SystemRestore-Main" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS">
<DisableSR>1</DisableSR>
</component>
<component name="Microsoft-Windows-International-Core" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS">
<InputLocale>0409:00000409</InputLocale>
<SystemLocale>en-US</SystemLocale>
<UILanguage>en-US</UILanguage>
<UserLocale>en-US</UserLocale>
</component>
<component name="Microsoft-Windows-Deployment" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS">
<RunSynchronous>
<RunSynchronousCommand wcm:action="add">
<Order>1</Order>
<Path>reg.exe add "HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\OOBE" /v BypassNRO /t REG_DWORD /d 1 /f</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>2</Order>
<Path>reg.exe load "HKU\mount" "C:\Users\Default\NTUSER.DAT"</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>3</Order>
<Path>reg.exe add "HKU\mount\Software\Microsoft\Windows\CurrentVersion\ContentDeliveryManager" /v "ContentDeliveryAllowed" /t REG_DWORD /d 0 /f</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>4</Order>
<Path>reg.exe add "HKU\mount\Software\Microsoft\Windows\CurrentVersion\ContentDeliveryManager" /v "FeatureManagementEnabled" /t REG_DWORD /d 0 /f</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>5</Order>
<Path>reg.exe add "HKU\mount\Software\Microsoft\Windows\CurrentVersion\ContentDeliveryManager" /v "OEMPreInstalledAppsEnabled" /t REG_DWORD /d 0 /f</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>6</Order>
<Path>reg.exe add "HKU\mount\Software\Microsoft\Windows\CurrentVersion\ContentDeliveryManager" /v "PreInstalledAppsEnabled" /t REG_DWORD /d 0 /f</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>7</Order>
<Path>reg.exe add "HKU\mount\Software\Microsoft\Windows\CurrentVersion\ContentDeliveryManager" /v "PreInstalledAppsEverEnabled" /t REG_DWORD /d 0 /f</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>8</Order>
<Path>reg.exe add "HKU\mount\Software\Microsoft\Windows\CurrentVersion\ContentDeliveryManager" /v "SilentInstalledAppsEnabled" /t REG_DWORD /d 0 /f</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>9</Order>
<Path>reg.exe add "HKU\mount\Software\Microsoft\Windows\CurrentVersion\ContentDeliveryManager" /v "SoftLandingEnabled" /t REG_DWORD /d 0 /f</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>10</Order>
<Path>reg.exe add "HKU\mount\Software\Microsoft\Windows\CurrentVersion\ContentDeliveryManager" /v "SubscribedContentEnabled" /t REG_DWORD /d 0 /f</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>11</Order>
<Path>reg.exe add "HKU\mount\Software\Microsoft\Windows\CurrentVersion\ContentDeliveryManager" /v "SubscribedContent-310093Enabled" /t REG_DWORD /d 0 /f</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>12</Order>
<Path>reg.exe add "HKU\mount\Software\Microsoft\Windows\CurrentVersion\ContentDeliveryManager" /v "SubscribedContent-338387Enabled" /t REG_DWORD /d 0 /f</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>13</Order>
<Path>reg.exe add "HKU\mount\Software\Microsoft\Windows\CurrentVersion\ContentDeliveryManager" /v "SubscribedContent-338388Enabled" /t REG_DWORD /d 0 /f</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>14</Order>
<Path>reg.exe add "HKU\mount\Software\Microsoft\Windows\CurrentVersion\ContentDeliveryManager" /v "SubscribedContent-338389Enabled" /t REG_DWORD /d 0 /f</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>15</Order>
<Path>reg.exe add "HKU\mount\Software\Microsoft\Windows\CurrentVersion\ContentDeliveryManager" /v "SubscribedContent-338393Enabled" /t REG_DWORD /d 0 /f</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>16</Order>
<Path>reg.exe add "HKU\mount\Software\Microsoft\Windows\CurrentVersion\ContentDeliveryManager" /v "SubscribedContent-353698Enabled" /t REG_DWORD /d 0 /f</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>17</Order>
<Path>reg.exe add "HKU\mount\Software\Microsoft\Windows\CurrentVersion\ContentDeliveryManager" /v "SystemPaneSuggestionsEnabled" /t REG_DWORD /d 0 /f</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>18</Order>
<Path>reg.exe add "HKU\mount\Software\Policies\Microsoft\Windows\CloudContent" /v "DisableCloudOptimizedContent" /t REG_DWORD /d 1 /f</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>19</Order>
<Path>reg.exe add "HKU\mount\Software\Policies\Microsoft\Windows\CloudContent" /v "DisableWindowsConsumerFeatures" /t REG_DWORD /d 1 /f</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>20</Order>
<Path>reg.exe add "HKU\mount\Software\Policies\Microsoft\Windows\CloudContent" /v "DisableConsumerAccountStateContent" /t REG_DWORD /d 1 /f</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>21</Order>
<Path>reg.exe unload "HKU\mount"</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>22</Order>
<Path>reg.exe add "HKLM\Software\Policies\Microsoft\Windows\CloudContent" /v "DisableCloudOptimizedContent" /t REG_DWORD /d 1 /f</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>23</Order>
<Path>reg.exe add "HKLM\Software\Policies\Microsoft\Windows\CloudContent" /v "DisableWindowsConsumerFeatures" /t REG_DWORD /d 1 /f</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>24</Order>
<Path>reg.exe add "HKLM\Software\Policies\Microsoft\Windows\CloudContent" /v "DisableConsumerAccountStateContent" /t REG_DWORD /d 1 /f</Path>
</RunSynchronousCommand>
<RunSynchronousCommand wcm:action="add">
<Order>25</Order>
<Path>reg.exe add "HKLM\SOFTWARE\Policies\Microsoft\Windows NT\CurrentVersion\NetworkList\Signatures\FirstNetwork" /v Category /t REG_DWORD /d 1 /f</Path>
<Description>Set Network Location to Home</Description>
</RunSynchronousCommand>
</RunSynchronous>
</component>
<component name="Microsoft-Windows-TerminalServices-LocalSessionManager" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS">
<fDenyTSConnections>false</fDenyTSConnections>
</component>
<component name="Microsoft-Windows-TerminalServices-RDP-WinStationExtensions" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS">
<UserAuthentication>0</UserAuthentication>
</component>
<component name="Networking-MPSSVC-Svc" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS">
<FirewallGroups>
<FirewallGroup wcm:action="add" wcm:keyValue="RemoteDesktop">
<Active>true</Active>
<Profile>all</Profile>
<Group>@FirewallAPI.dll,-28752</Group>
</FirewallGroup>
</FirewallGroups>
</component>
</settings>
<settings pass="auditSystem" />
<settings pass="auditUser" />
<settings pass="oobeSystem">
<component name="Microsoft-Windows-SecureStartup-FilterDriver" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS">
<PreventDeviceEncryption>true</PreventDeviceEncryption>
</component>
<component name="Microsoft-Windows-EnhancedStorage-Adm" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS">
<TCGSecurityActivationDisabled>1</TCGSecurityActivationDisabled>
</component>
<component name="Microsoft-Windows-Shell-Setup" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS">
<UserAccounts>
<LocalAccounts>
<LocalAccount wcm:action="add">
<Name>Docker</Name>
<Group>Administrators</Group>
<Password>
<Value />
<PlainText>true</PlainText>
</Password>
</LocalAccount>
</LocalAccounts>
<AdministratorPassword>
<Value>password</Value>
<PlainText>true</PlainText>
</AdministratorPassword>
</UserAccounts>
<AutoLogon>
<Username>Docker</Username>
<Enabled>true</Enabled>
<LogonCount>65432</LogonCount>
<Password>
<Value />
<PlainText>true</PlainText>
</Password>
</AutoLogon>
<Display>
<ColorDepth>32</ColorDepth>
<HorizontalResolution>1920</HorizontalResolution>
<VerticalResolution>1080</VerticalResolution>
</Display>
<OOBE>
<HideEULAPage>true</HideEULAPage>
<HideLocalAccountScreen>true</HideLocalAccountScreen>
<HideOEMRegistrationScreen>true</HideOEMRegistrationScreen>
<HideOnlineAccountScreens>true</HideOnlineAccountScreens>
<HideWirelessSetupInOOBE>true</HideWirelessSetupInOOBE>
<NetworkLocation>Home</NetworkLocation>
<ProtectYourPC>3</ProtectYourPC>
<SkipUserOOBE>true</SkipUserOOBE>
<SkipMachineOOBE>true</SkipMachineOOBE>
</OOBE>
<RegisteredOrganization>Dockur</RegisteredOrganization>
<RegisteredOwner>Windows for Docker</RegisteredOwner>
<FirstLogonCommands>
<SynchronousCommand wcm:action="add">
<Order>1</Order>
<CommandLine>reg.exe add "HKLM\SYSTEM\CurrentControlSet\Services\LanmanWorkstation\Parameters" /v "AllowInsecureGuestAuth" /t REG_DWORD /d 1 /f</CommandLine>
<Description>Allow guest access to network shares</Description>
</SynchronousCommand>
<SynchronousCommand wcm:action="add">
<Order>2</Order>
<CommandLine>reg.exe add "HKLM\SYSTEM\CurrentControlSet\Services\LanmanWorkstation\Parameters" /v "RequireSecuritySignature" /t REG_DWORD /d 0 /f</CommandLine>
<Description>Disable SMB signing requirement</Description>
</SynchronousCommand>
<SynchronousCommand wcm:action="add">
<Order>3</Order>
<CommandLine>reg.exe add "HKLM\SYSTEM\CurrentControlSet\Control\Lsa" /v LimitBlankPasswordUse /t REG_DWORD /d 0 /f</CommandLine>
<Description>Allow RDP login with blank password</Description>
</SynchronousCommand>
<SynchronousCommand wcm:action="add">
<Order>4</Order>
<CommandLine>reg.exe add "HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\PasswordLess\Device" /v "DevicePasswordLessBuildVersion" /t REG_DWORD /d 0 /f</CommandLine>
<Description>Enable option for passwordless sign-in</Description>
</SynchronousCommand>
<SynchronousCommand wcm:action="add">
<Order>5</Order>
<CommandLine>cmd /C wmic useraccount where name="Docker" set PasswordExpires=false</CommandLine>
<Description>Password Never Expires</Description>
</SynchronousCommand>
<SynchronousCommand wcm:action="add">
<Order>6</Order>
<CommandLine>cmd /C POWERCFG -H OFF</CommandLine>
<Description>Disable Hibernation</Description>
</SynchronousCommand>
<SynchronousCommand wcm:action="add">
<Order>7</Order>
<CommandLine>cmd /C POWERCFG -X -monitor-timeout-ac 0</CommandLine>
<Description>Disable monitor blanking</Description>
</SynchronousCommand>
<SynchronousCommand wcm:action="add">
<Order>8</Order>
<CommandLine>reg.exe add "HKLM\SOFTWARE\Policies\Microsoft\Edge" /v "HideFirstRunExperience" /t REG_DWORD /d 1 /f</CommandLine>
<Description>Disable first-run experience in Edge</Description>
</SynchronousCommand>
<SynchronousCommand wcm:action="add">
<Order>9</Order>
<CommandLine>reg.exe add "HKCU\SOFTWARE\Microsoft\Windows\CurrentVersion\Explorer\Advanced" /v "HideFileExt" /t REG_DWORD /d 0 /f</CommandLine>
<Description>Show file extensions in Explorer</Description>
</SynchronousCommand>
<SynchronousCommand wcm:action="add">
<Order>10</Order>
<CommandLine>reg.exe add "HKLM\SYSTEM\CurrentControlSet\Control\Power" /v "HibernateFileSizePercent" /t REG_DWORD /d 0 /f</CommandLine>
<Description>Zero Hibernation File</Description>
</SynchronousCommand>
<SynchronousCommand wcm:action="add">
<Order>11</Order>
<CommandLine>reg.exe add "HKLM\SYSTEM\CurrentControlSet\Control\Power" /v "HibernateEnabled" /t REG_DWORD /d 0 /f</CommandLine>
<Description>Disable Hibernation</Description>
</SynchronousCommand>
<SynchronousCommand wcm:action="add">
<Order>12</Order>
<CommandLine>cmd /C POWERCFG -X -standby-timeout-ac 0</CommandLine>
<Description>Disable Sleep</Description>
</SynchronousCommand>
<SynchronousCommand wcm:action="add">
<Order>13</Order>
<CommandLine>reg.exe add "HKLM\SOFTWARE\Policies\Microsoft\Windows NT\Terminal Services" /v "fAllowUnlistedRemotePrograms" /t REG_DWORD /d 1 /f</CommandLine>
<Description>Enable RemoteAPP to launch unlisted programs</Description>
</SynchronousCommand>
<SynchronousCommand wcm:action="add">
<Order>14</Order>
<CommandLine>reg.exe add "HKCU\SOFTWARE\Microsoft\Windows\CurrentVersion\Explorer\Advanced" /v "ShowTaskViewButton" /t REG_DWORD /d 0 /f</CommandLine>
<Description>Remove Task View from the Taskbar</Description>
</SynchronousCommand>
<SynchronousCommand wcm:action="add">
<Order>15</Order>
<CommandLine>reg.exe add "HKCU\SOFTWARE\Microsoft\Windows\CurrentVersion\Explorer\Advanced" /v "TaskbarDa" /t REG_DWORD /d 0 /f</CommandLine>
<Description>Remove Widgets from the Taskbar</Description>
</SynchronousCommand>
<SynchronousCommand wcm:action="add">
<Order>16</Order>
<CommandLine>reg.exe add "HKCU\SOFTWARE\Microsoft\Windows\CurrentVersion\Explorer\Advanced" /v "TaskbarMn" /t REG_DWORD /d 0 /f</CommandLine>
<Description>Remove Chat from the Taskbar</Description>
</SynchronousCommand>
<SynchronousCommand wcm:action="add">
<Order>17</Order>
<CommandLine>reg.exe add "HKLM\SOFTWARE\Policies\Microsoft\Windows\WindowsUpdate\AU" /v "NoAutoUpdate" /t REG_DWORD /d 1 /f</CommandLine>
<Description>Turn off Windows Update auto download</Description>
</SynchronousCommand>
<SynchronousCommand wcm:action="add">
<Order>18</Order>
<CommandLine>netsh advfirewall firewall set rule group="@FirewallAPI.dll,-32752" new enable=Yes</CommandLine>
<Description>Enable Network Discovery</Description>
</SynchronousCommand>
<SynchronousCommand wcm:action="add">
<Order>19</Order>
<CommandLine>netsh advfirewall firewall set rule group="@FirewallAPI.dll,-28502" new enable=Yes</CommandLine>
<Description>Enable File Sharing</Description>
</SynchronousCommand>
<SynchronousCommand wcm:action="add">
<Order>20</Order>
<CommandLine>reg.exe add "HKCU\Control Panel\UnsupportedHardwareNotificationCache" /v SV1 /d 0 /t REG_DWORD /f</CommandLine>
<Description>Disable unsupported hardware notifications</Description>
</SynchronousCommand>
<SynchronousCommand wcm:action="add">
<Order>21</Order>
<CommandLine>reg.exe add "HKCU\Control Panel\UnsupportedHardwareNotificationCache" /v SV2 /d 0 /t REG_DWORD /f</CommandLine>
<Description>Disable unsupported hardware notifications</Description>
</SynchronousCommand>
<SynchronousCommand wcm:action="add">
<Order>22</Order>
<CommandLine>pnputil -i -a C:\Windows\Drivers\viogpudo\viogpudo.inf</CommandLine>
<Description>Install VirtIO display driver</Description>
</SynchronousCommand>
<SynchronousCommand wcm:action="add">
<Order>23</Order>
<CommandLine>cmd /C rd /q C:\Windows.old</CommandLine>
<Description>Remove empty Windows.old folder</Description>
</SynchronousCommand>
<SynchronousCommand wcm:action="add">
<Order>24</Order>
<CommandLine>cmd /C if exist "C:\OEM\install.bat" start "Install" "cmd /C C:\OEM\install.bat"</CommandLine>
<Description>Execute custom script from the OEM folder if exists</Description>
</SynchronousCommand>
</FirstLogonCommands>
</component>
</settings>
</unattend>
\ No newline at end of file
Add your Win11E setup.iso to this folder
\ No newline at end of file
@echo off
SET ScriptFolder=\\host.lan\Data
SET LogFile=%ScriptFolder%\firstboot_log.txt
echo Running PowerShell script... > %LogFile%
:: Check for PowerShell availability
where powershell >> %LogFile% 2>&1
if %ERRORLEVEL% neq 0 (
echo PowerShell is not available! >> %LogFile%
echo PowerShell is not available!
exit /b 1
)
:: Add a 30-second delay
echo Waiting for 30 seconds before continuing... >> %LogFile%
timeout /t 30 /nobreak >> %LogFile% 2>&1
:: Run PowerShell script with ExecutionPolicy Bypass and log errors
echo Running setup.ps1... >> %LogFile%
powershell -ExecutionPolicy Bypass -File "%ScriptFolder%\setup.ps1" >> %LogFile% 2>&1
if %ERRORLEVEL% neq 0 (
echo An error occurred. See %LogFile% for details.
) else (
echo PowerShell script has completed successfully.
)
echo PowerShell script has completed.
\ No newline at end of file
$scriptFolder = "\\host.lan\Data"
$pythonScriptFile = "$scriptFolder\server\main.py"
$pythonServerPort = 5000
# Start the flask computer use server
Write-Host "Running the server on port $pythonServerPort"
python $pythonScriptFile --port $pythonServerPort
import os
import logging
import argparse
import shlex
import subprocess
from flask import Flask, request, jsonify, send_file
import threading
import traceback
import pyautogui
from PIL import Image
from io import BytesIO
parser = argparse.ArgumentParser()
parser.add_argument("--log_file", help="log file path", type=str,
default=os.path.join(os.path.dirname(__file__), "server.log"))
parser.add_argument("--port", help="port", type=int, default=5000)
args = parser.parse_args()
logging.basicConfig(filename=args.log_file,level=logging.DEBUG, filemode='w' )
logger = logging.getLogger('werkzeug')
app = Flask(__name__)
computer_control_lock = threading.Lock()
@app.route('/probe', methods=['GET'])
def probe_endpoint():
return jsonify({"status": "Probe successful", "message": "Service is operational"}), 200
@app.route('/execute', methods=['POST'])
def execute_command():
# Only execute one command at a time
with computer_control_lock:
data = request.json
# The 'command' key in the JSON request should contain the command to be executed.
shell = data.get('shell', False)
command = data.get('command', "" if shell else [])
if isinstance(command, str) and not shell:
command = shlex.split(command)
# Expand user directory
for i, arg in enumerate(command):
if arg.startswith("~/"):
command[i] = os.path.expanduser(arg)
# Execute the command without any safety checks.
try:
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell, text=True, timeout=120)
return jsonify({
'status': 'success',
'output': result.stdout,
'error': result.stderr,
'returncode': result.returncode
})
except Exception as e:
logger.error("\n" + traceback.format_exc() + "\n")
return jsonify({
'status': 'error',
'message': str(e)
}), 500
@app.route('/screenshot', methods=['GET'])
def capture_screen_with_cursor():
cursor_path = os.path.join(os.path.dirname(__file__), "cursor.png")
screenshot = pyautogui.screenshot()
cursor_x, cursor_y = pyautogui.position()
cursor = Image.open(cursor_path)
# make the cursor smaller
cursor = cursor.resize((int(cursor.width / 1.5), int(cursor.height / 1.5)))
screenshot.paste(cursor, (cursor_x, cursor_y), cursor)
# Convert PIL Image to bytes and send
img_io = BytesIO()
screenshot.save(img_io, 'PNG')
img_io.seek(0)
return send_file(img_io, mimetype='image/png')
if __name__ == '__main__':
app.run(debug=True, host="0.0.0.0", port=args.port)
\ No newline at end of file
function Get-Tools {
param(
[string]$toolsConfigJson
)
# Convert the JSON string to a PowerShell object
$toolsList = $toolsConfigJson | ConvertFrom-Json
return $toolsList
}
function Get-ToolDetails {
param(
$toolsList,
[string]$toolName
)
# Check if the program exists in the JSON data
if ($toolsList.PSObject.Properties.Name -contains $toolName) {
# Return the program details as a PowerShell object
return $toolsList.$toolName
} else {
# Handle the case where the program is not found
Write-Host "Program '$toolName' not found in the list."
return $null
}
}
function Invoke-DownloadFileFromAvailableMirrors {
param (
[string[]]$mirrorUrls,
[string]$outfile
)
foreach ($url in $mirrorUrls) {
try {
$result = Invoke-DownloadFile -url $url -outfile $outfile
if ($result -eq $true) {
Write-Host "Downloaded using $url"
return $true
}
} catch {
Write-Host "Error downloading from $url. Please check and update the mirrors."
}
}
Write-Host "Downloading from the provided mirrors failed. Please check and update the mirrors."
return $false
}
function Invoke-DownloadFile {
param (
[string]$url,
[string]$outfile
)
# Makes download faster by disabling progress bar
$ProgressPreference = "SilentlyContinue"
$retryCount = 0
$maxRetries = 3
$sleepSeconds = 2
$maxSleepSeconds = 10
$userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
# Ensure directory exists
$directory = Split-Path -Path $outfile -Parent
if (-Not (Test-Path -Path $directory)) {
Write-Host "Creating directory $directory..."
New-Item -Path $directory -ItemType Directory -Force | Out-Null
}
while ($retryCount -lt $maxRetries) {
try {
Invoke-RestMethod -Uri $url -OutFile $outfile -Headers @{"User-Agent" = $userAgent}
Write-Host "Download successful, file saved to: $outfile"
break
} catch {
$retryCount++
Write-Host "Attempt $retryCount of $maxRetries failed. Error: $($_.Exception.Message)"
Start-Sleep -Seconds $sleepSeconds
$sleepSeconds = [Math]::Min($sleepSeconds * 2, $maxSleepSeconds) # Exponential backoff with a cap
}
}
if ($retryCount -eq $maxRetries) {
Write-Host "Failed to download the file after $maxRetries attempts."
return $false
}
return $true
}
function Add-ToEnvPath {
param (
[string]$NewPath
)
# Get the current PATH environment variable
$envPath = [Environment]::GetEnvironmentVariable("PATH", "Machine")
# Append the new path to the existing PATH
$newPath = "$envPath;$NewPath"
# Set the updated PATH environment variable
[Environment]::SetEnvironmentVariable("PATH", $newPath, "Machine")
# Fetch updates from the shell
$env:PATH += ";${newPath}"
}
function Register-LogonTask {
param(
[parameter(Mandatory = $true, ValueFromPipelineByPropertyName = $true, HelpMessage = "Name of the scheduled task")]
[string]
$TaskName,
[parameter(Mandatory = $true, ValueFromPipelineByPropertyName = $true, HelpMessage = "Path to the .py script")]
[string]
$ScriptPath,
[parameter(Mandatory = $false, ValueFromPipelineByPropertyName = $true, HelpMessage = "Arguments to the .py script")]
[string]
$Arguments = "",
[parameter(Mandatory = $false, ValueFromPipelineByPropertyName = $true, HelpMessage = "Local Account username")]
[string]
$LocalUser,
[parameter(Mandatory = $false, ValueFromPipelineByPropertyName = $true, HelpMessage = "Local Account password")]
[string]
$LocalPassword,
[parameter(Mandatory = $false, ValueFromPipelineByPropertyName = $true, HelpMessage = "Whether to execute the command as SYSTEM")]
[switch]
$AsSystem = $false,
[parameter(Mandatory = $false, ValueFromPipelineByPropertyName = $true, HelpMessage = "logging file")]
[string]
$LogFilePath
)
$scriptDirectory = Split-Path $ScriptPath
$taskActionArgument = "-ExecutionPolicy Bypass -windowstyle hidden -Command `"try { . '$ScriptPath' $Arguments } catch { Write `$_.Exception.Message | Out-File $($TaskName)_Log.txt } finally { } `""
$taskAction = New-ScheduledTaskAction -Execute "$PSHome\powershell.exe" -Argument $taskActionArgument -WorkingDirectory $scriptDirectory
$params = @{
Force = $True
Action = $taskAction
RunLevel = "Highest"
TaskName = $TaskName
}
$taskTrigger = New-ScheduledTaskTrigger -AtLogOn
$params.Add("Trigger", $taskTrigger)
if ($AsSystem) {
$params.Add("User", "NT AUTHORITY\SYSTEM")
}
else {
$params.Add("User", $LocalUser)
if ($LocalPassword) {
$params.Add("Password", $LocalPassword)
}
}
Write-Host "Registering scheduled task '$TaskName' to run 'powershell.exe $taskActionArgument'..."
Register-ScheduledTask @params
}
# Function to attempt pip install and handle failures
function Install-PythonPackages {
param (
[string]$Package = "",
[string]$Arguments = "",
[string]$RequirementsPath = ""
)
$RetryCount = 3
$currentAttempt = 0
while ($currentAttempt -lt $RetryCount) {
if (-not [string]::IsNullOrWhiteSpace($RequirementsPath)) {
& python -m pip install --no-cache-dir -r $RequirementsPath $Arguments
} else {
& python -m pip install --no-cache-dir $Package $Arguments
}
if ($LASTEXITCODE -eq 0) {
Write-Host "Installation successful."
return
} else {
Write-Host "Attempt $($currentAttempt + 1) failed. Retrying..."
Start-Sleep -Seconds 10
$currentAttempt++
}
}
Write-Error "Failed to install after $RetryCount attempts."
exit
}
\ No newline at end of file
$ErrorActionPreference = "Continue" # until downloading from mirrors is more stable
# Section - General Setup
$scriptFolder = "\\host.lan\Data"
$toolsFolder = "C:\Users\$env:USERNAME\Tools"
# Load the shared setup-tools module
Import-Module (Join-Path $scriptFolder -ChildPath "setup-tools.psm1")
# Check if profile exists
if (-not (Test-Path $PROFILE)) {
New-Item -ItemType File -Path $PROFILE -Force
}
# Create a folder where we store all the standalone executables
if (-not (Test-Path $toolsFolder)) {
New-Item -ItemType Directory -Path $toolsFolder -Force
$envPath = [Environment]::GetEnvironmentVariable("PATH", "Machine")
$newPath = "$envPath;$toolsFolder"
[Environment]::SetEnvironmentVariable("PATH", $newPath, "Machine")
}
# Section - Tools Installation
# Set TLS version to 1.2 or higher
[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12 -bor [Net.SecurityProtocolType]::Tls13
# Load the tools config json listing mirrors and aliases used for installing tools
$toolsConfigJsonPath = Join-Path $scriptFolder -ChildPath "tools_config.json"
$toolsConfigJson = Get-Content -Path $toolsConfigJsonPath -Raw
$toolsList = Get-Tools -toolsConfigJson $toolsConfigJson
## - Python
$pythonToolName = "Python"
$userPythonPath = "$env:LOCALAPPDATA\Programs\Python"
$pythonDetails = Get-ToolDetails -toolsList $toolsList -toolName $pythonToolName
$pythonAlias = $pythonDetails.alias
# Check for Python installation
$pythonExecutablePath = Get-ChildItem -Path $userPythonPath -Filter python.exe -Recurse -ErrorAction SilentlyContinue | Select-Object -First 1 -ExpandProperty FullName
# Force to install Python 3.10 as the pre-installed version on Windows may not work sometimes
Write-Host "Downloading Python $pythonVersion..."
$pythonInstallerFilePath = "$env:TEMP\python_installer.exe"
$downloadResult = Invoke-DownloadFileFromAvailableMirrors -mirrorUrls $pythonDetails.mirrors -outfile $pythonInstallerFilePath
if (-not $downloadResult) {
Write-Host "Failed to download Python. Please try again later or install manually."
} else {
Write-Host "Installing Python for current user..."
Start-Process -FilePath $pythonInstallerFilePath -Args "/quiet InstallAllUsers=0 PrependPath=0" -NoNewWindow -Wait
$pythonExecutablePath = "$userPythonPath\Python310\python.exe"
$setAliasExpression = "Set-Alias -Name $pythonAlias -Value `"$pythonExecutablePath`""
Add-Content -Path $PROFILE -Value $setAliasExpression
Invoke-Expression $setAliasExpression
}
## - Git
$gitToolName = "git"
$gitToolDetails = Get-ToolDetails -toolsList $toolsList -toolName $gitToolName
# Check for Git installation
try {
git --version | Out-Null
Write-Host "Git is already installed."
} catch {
Write-Host "Git is not installed. Downloading and installing Git..."
$gitInstallerFilePath = "$env:TEMP\git_installer.exe"
$downloadResult = Invoke-DownloadFileFromAvailableMirrors -mirrorUrls $gitToolDetails.mirrors -outfile $gitInstallerFilePath
if (-not $downloadResult) {
Write-Host "Failed to download Git. Please try again later or install manually."
} else {
Start-Process -FilePath $gitInstallerFilePath -Args "/VERYSILENT /NORESTART /NOCANCEL /SP-" -Wait
Add-ToEnvPath -NewPath "C:\Program Files\Git\bin"
Write-Host "Git has been installed."
}
}
# - 7zip
$7ZipToolName = "7zip"
$7ZipToolDetails = Get-ToolDetails -toolsList $toolsList -toolName $7ZipToolName
Write-Host "$7ZipToolDetails"
if (Get-Command 7z -ErrorAction SilentlyContinue) {
Write-Host "7-Zip is already installed."
}
else {
Write-Host "Installing 7-Zip..."
$7ZipInstallerFilePath = "$env:TEMP\7_zip.exe"
Write-Host "$($7ZipToolDetails.mirrors)"
$downloadResult = Invoke-DownloadFileFromAvailableMirrors -mirrorUrls $7ZipToolDetails.mirrors -outfile $7ZipInstallerFilePath
if (-not $downloadResult) {
Write-Host "Failed to download 7-Zip. Please try again later or install manually."
} else {
Start-Process -FilePath $7ZipInstallerFilePath -Args "/S" -Verb RunAs -Wait
Remove-Item $7ZipInstallerFilePath
# add 7z to PATH
Add-ToEnvPath -NewPath "${env:ProgramFiles}\7-Zip"
}
}
# - ffpmeg
$ffpmegToolName = "ffmpeg"
$ffpmegToolDetails = Get-ToolDetails -toolsList $toolsList -toolName $ffpmegToolName
if (Get-Command ffmpeg -ErrorAction SilentlyContinue) {
Write-Host "ffmpeg is already installed."
} else {
Write-Host "ffmpeg is not installed. Installing it."
$ffpmegInstallerFilePath = "C:\ffmpeg.7z"
$downloadResult = Invoke-DownloadFileFromAvailableMirrors -mirrorUrls $ffpmegToolDetails.mirrors -outfile $ffpmegInstallerFilePath
if (-not $downloadResult) {
Write-Host "Failed to download ffmpeg. Please try again later or install manually."
} else {
Write-Host "Extracting $ffpmegInstallerFilePath..."
7z x -y -o"C:\" "C:\ffmpeg.7z"
$ffmpegFolder = Get-ChildItem -Path "C:\" -Filter "ffmpeg-*" -Directory
$ffmpegFolder = -join ("C:\", $ffmpegFolder)
#remove ffmpeg folder if exists
if (Test-Path "C:\ffmpeg") {
Remove-Item -Path "C:\ffmpeg" -Recurse -Force
}
Rename-Item -Path "$ffmpegFolder" -NewName "ffmpeg"
Write-Host "Adding ffmpeg to PATH..."
Add-ToEnvPath -NewPath "C:\ffmpeg\bin"
Write-Host "ffmpeg is installed"
}
}
# Disable Edge Auto Updates
Stop-Process -Name "MicrosoftEdgeUpdate" -Force -ErrorAction SilentlyContinue
$edgeUpdatePath = "${env:ProgramFiles(x86)}\Microsoft\EdgeUpdate"
Remove-Item -Path $edgeUpdatePath -Recurse -Force -ErrorAction SilentlyContinue
Write-Host "Edge Update processes terminated and directory removed."
# - Google Chrome
$chromeToolName = "Google Chrome"
$chromeToolDetails = Get-ToolDetails -toolsList $toolsList -toolName $chromeToolName
$chromeExePath = "C:\Program Files\Google\Chrome\Application\chrome.exe"
$chromeAlias = $chromeToolDetails.alias
# Check if Google Chrome is already installed by its alias
if (Get-Command $chromeAlias -ErrorAction SilentlyContinue) {
Write-Host "Google Chrome is already installed."
} else {
# Download the installer to the Temp directory
$chromeInstallerFilePath = "$env:TEMP\chrome_installer.exe"
$downloadResult = Invoke-DownloadFileFromAvailableMirrors -mirrorUrls $chromeToolDetails.mirrors -outfile $chromeInstallerFilePath
if (-not $downloadResult) {
Write-Host "Failed to download Google Chrome. Please try again later or install manually."
} else {
# Execute the installer silently with elevated permissions
Start-Process -FilePath $chromeInstallerFilePath -ArgumentList "/silent", "/install" -Verb RunAs -Wait
# Remove the installer file after installation
Remove-Item -Path $chromeInstallerFilePath
# Set alias
$setAliasExpression = "Set-Alias -Name $chromeAlias -Value `"$chromeExePath`""
Add-Content -Path $PROFILE -Value $setAliasExpression
Invoke-Expression $setAliasExpression
# Add Chrome to the system PATH environment variable
Add-ToEnvPath -NewPath "${env:ProgramFiles}\Google\Chrome\Application"
# Disable Google Chrome Auto Updates
$chromeRegPath = "HKLM:\SOFTWARE\Policies\Google\Update"
if (-not (Test-Path $chromeRegPath)) {
New-Item -Path $chromeRegPath -Force
}
Set-ItemProperty -Path $chromeRegPath -Name "AutoUpdateCheckPeriodMinutes" -Value 0
Set-ItemProperty -Path $chromeRegPath -Name "UpdateDefault" -Value 0
}
}
# - LibreOffice
$libreOfficeToolName = "LibreOffice"
$libreOfficeToolDetails = Get-ToolDetails -toolsList $toolsList -toolName $libreOfficeToolName
# Check for LibreOffice installation
$installedVersion = (Get-WmiObject -Query "SELECT * FROM Win32_Product WHERE Name like 'LibreOffice%'").Version
if (-not [string]::IsNullOrWhiteSpace($installedVersion)) {
Write-Host "LibreOffice $version is already installed."
} else {
Write-Host "LibreOffice is not installed. Downloading and installing LibreOffice..."
$libreOfficeInstallerFilePath = "$env:TEMP\libreOffice_installer.exe"
$downloadResult = Invoke-DownloadFileFromAvailableMirrors -mirrorUrls $libreOfficeToolDetails.mirrors -outfile $libreOfficeInstallerFilePath
if (-not $downloadResult) {
Write-Host "Failed to download LibreOffice. Please try again later or install manually."
} else {
Start-Process "msiexec.exe" -ArgumentList "/i `"$libreOfficeInstallerFilePath`" /quiet" -Wait -NoNewWindow
Write-Host "LibreOffice has been installed."
# Add LibreOffice to the system PATH environment variable
Add-ToEnvPath -NewPath "C:\Program Files\LibreOffice\program"
}
}
# - VLC
$vlcToolName = "VLC"
$vlcToolDetails = Get-ToolDetails -toolsList $toolsList -toolName $vlcToolName
$vlcAlias = $vlcToolDetails.alias
$vlcExecutableFilePath = "C:\Program Files\VideoLAN\VLC\vlc.exe"
# Check if VLC is already installed by checking the VLC command
if (Test-Path $vlcExecutableFilePath) {
Write-Host "VLC is already installed."
} else {
# Download the installer to the Temp directory
$vlcInstallerFilePath = "$env:TEMP\vlc_installer.exe"
$downloadResult = Invoke-DownloadFileFromAvailableMirrors -mirrorUrls $vlcToolDetails.mirrors -outfile $vlcInstallerFilePath
if (-not $downloadResult) {
Write-Host "Failed to download VLC. Please try again later or install manually."
} else {
# Execute the installer silently with elevated permissions
Start-Process -FilePath $vlcInstallerFilePath -ArgumentList "/S" -Verb RunAs -Wait
# Remove the installer file after installation
Remove-Item -Path $vlcInstallerFilePath
# Set alias
$setAliasExpression = "Set-Alias -Name $vlcAlias -Value `"$vlcExecutableFilePath`""
Add-Content -Path $PROFILE -Value $setAliasExpression
Invoke-Expression $setAliasExpression
# Add VLC to the system PATH environment variable
Add-ToEnvPath -NewPath "C:\Program Files\VideoLAN\VLC"
}
}
# - GIMP
$gimpToolName = "GIMP"
$gimpToolDetails = Get-ToolDetails -toolsList $toolsList -toolName $gimpToolName
$gimpAlias = $gimpToolDetails.alias
$gimpExecutablePath = "C:\Program Files\GIMP 2\bin\gimp-2.10.exe"
# Check if GIMP is already installed by checking the GIMP executable path
if (Test-Path $gimpExecutablePath) {
Write-Host "GIMP is already installed."
} else {
# Download the installer to the Temp directory
$gimpInstallerFilePath = "$env:TEMP\gimp_installer.exe"
$downloadResult = Invoke-DownloadFileFromAvailableMirrors -mirrorUrls $gimpToolDetails.mirrors -outfile $gimpInstallerFilePath
if (-not $downloadResult) {
Write-Host "Failed to download GIMP. Please try again later or install manually."
} else {
# Execute the installer silently with elevated permissions
Start-Process -FilePath $gimpInstallerFilePath -ArgumentList "/VERYSILENT /ALLUSERS" -Verb RunAs -Wait
# Remove the installer file after installation
Remove-Item -Path $gimpInstallerFilePath
# Set alias
$setAliasExpression = "Set-Alias -Name $gimpAlias -Value `"$gimpExecutablePath`""
Add-Content -Path $PROFILE -Value $setAliasExpression
Invoke-Expression $setAliasExpression
# Add GIMP to the system PATH environment variable
Add-ToEnvPath -NewPath "C:\Program Files\GIMP 2\bin"
}
}
# - VS Code
$vsCodeToolName = "VS Code"
$vsCodeToolDetails = Get-ToolDetails -toolsList $toolsList -toolName $vsCodeToolName
$vsCodeAlias = $gimpToolDetails.alias
$vsCodeExecutablePath = "C:\Users\$env:USERNAME\AppData\Local\Programs\Microsoft VS Code\Code.exe"
# Check if VS Code is already installed by checking the VS Code executable path
if (Test-Path $vsCodeExecutablePath) {
Write-Host "VS Code is already installed."
} else {
# Download the installer to the Temp directory
$vsCodeInstallerFilePath = "$env:TEMP\VSCodeSetup.exe"
$downloadResult = Invoke-DownloadFileFromAvailableMirrors -mirrorUrls $vsCodeToolDetails.mirrors -outfile $vsCodeInstallerFilePath
if (-not $downloadResult) {
Write-Host "Failed to download VS Code. Please try again later or install manually."
} else {
# Execute the installer silently with elevated permissions
Start-Process -FilePath $vsCodeInstallerFilePath -ArgumentList "/VERYSILENT", "/mergetasks=!runcode" -Verb RunAs -Wait
# Remove the installer file after installation
Remove-Item -Path $vsCodeInstallerFilePath
# Set alias
$setAliasExpression = "Set-Alias -Name $vsCodeAlias -Value `"$vsCodeExecutablePath`""
Add-Content -Path $PROFILE -Value $setAliasExpression
Invoke-Expression $setAliasExpression
# Add VS Code to the system PATH environment variable
Add-ToEnvPath -NewPath "C:\Users\$env:USERNAME\AppData\Local\Programs\Microsoft VS Code\bin"
# Disable Visual Studio Code Auto Updates
$vsCodeSettingsPath = "${env:APPDATA}\Code\User\settings.json"
if (-not (Test-Path $vsCodeSettingsPath)) {
# Create the directory if it doesn't exist
$dirPath = Split-Path -Path $vsCodeSettingsPath -Parent
if (-not (Test-Path $dirPath)) {
New-Item -ItemType Directory -Path $dirPath -Force
}
# Initialize an empty hashtable to act as the JSON object
$settingsObj = @{}
$settingsObj["update.mode"] = "none" # Set update mode to none
$settingsObj | ConvertTo-Json | Set-Content $vsCodeSettingsPath
} else {
# If the file exists, modify it
$settingsObj = Get-Content $vsCodeSettingsPath | ConvertFrom-Json
$settingsObj["update.mode"] = "none"
$settingsObj | ConvertTo-Json | Set-Content $vsCodeSettingsPath
}
}
}
# - Thunderbird
$thunderbirdToolName = "Thunderbird"
$thunderbirdToolDetails = Get-ToolDetails -toolsList $toolsList -toolName $thunderbirdToolName
$thunderbirdAlias = $thunderbirdToolDetails.alias
$thunderbirdExecutablePath = "C:\Program Files\Mozilla Thunderbird\thunderbird.exe"
# Check if Thunderbird is already installed by checking the Thunderbird executable path
if (Test-Path $thunderbirdExecutablePath) {
Write-Host "Thunderbird is already installed."
} else {
# Download the installer to the Temp directory
$thunderbirdInstallerFilePath = "$env:TEMP\ThunderbirdSetup.exe"
$downloadResult = Invoke-DownloadFileFromAvailableMirrors -mirrorUrls $thunderbirdToolDetails.mirrors -outfile $thunderbirdInstallerFilePath
if (-not $downloadResult) {
Write-Host "Failed to download Thunderbird. Please try again later or install manually."
} else {
# Execute the installer silently with elevated permissions
Start-Process -FilePath $thunderbirdInstallerFilePath -ArgumentList "/S" -Verb RunAs -Wait
# Remove the installer file after installation
Remove-Item -Path $thunderbirdInstallerFilePath
# Set alias
$setAliasExpression = "Set-Alias -Name $thunderbirdAlias -Value `"$thunderbirdExecutablePath`""
Add-Content -Path $PROFILE -Value $setAliasExpression
Invoke-Expression $setAliasExpression
# Add Thunderbird to the system PATH environment variable
Add-ToEnvPath -NewPath "C:\Program Files\Mozilla Thunderbird"
}
}
# - Server Setup
$pythonServerPort = 5000
$onLogonTaskName = "Server_OnLogon"
$requirementsFile = "$scriptFolder\server\requirements.txt"
# Ensure pip is updated to the latest version
Install-PythonPackages -Package "pip" -Arguments "--upgrade"
Install-PythonPackages -Package "wheel"
Install-PythonPackages -Package "pywinauto"
# Install Python packages from requirements.txt using Python's pip module
if (Test-Path $requirementsFile) {
Write-Host "Installing required Python packages using pip from requirements file..."
Install-PythonPackages -RequirementsPath $requirementsFile
} else {
Write-Error "Requirements file not found: $requirementsFile"
exit
}
# Add a firewall rule to allow incoming connections on the specified port for the Python executable
$pythonServerRuleName = "PythonHTTPServer-$pythonServerPort"
if (-not (Get-NetFirewallRule -Name $pythonServerRuleName -ErrorAction SilentlyContinue)) {
New-NetFirewallRule -DisplayName $pythonServerRuleName -Direction Inbound -Program $pythonExecutablePath -Protocol TCP -LocalPort $pythonServerPort -Action Allow -Profile Any
Write-Host "Firewall rule added to allow traffic on port $pythonServerPort for Python"
} else {
Write-Host "Firewall rule already exists. $pythonServerRuleName "
}
$onLogonScriptPath = "$scriptFolder\on-logon.ps1"
# Check if the scheduled task exists before unregistering it
if (Get-ScheduledTask -TaskName $onLogonTaskName -ErrorAction SilentlyContinue) {
Write-Host "Scheduled task $onLogonTaskName already exists."
} else {
Write-Host "Registering new task $onLogonTaskName..."
Register-LogonTask -TaskName $onLogonTaskName -ScriptPath $onLogonScriptPath -LocalUser "Docker"
}
Start-Sleep -Seconds 10
Start-ScheduledTask -TaskName $onLogonTaskName
\ No newline at end of file
{
"Python": {
"mirrors": [
"https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe"
],
"alias": "python"
},
"git": {
"mirrors": [
"https://github.com/git-for-windows/git/releases/download/v2.37.1.windows.1/Git-2.37.1-64-bit.exe"
]
},
"7zip": {
"mirrors": [
"https://www.7-zip.org/a/7z2407-x64.exe"
]
},
"ffmpeg": {
"mirrors": [
"https://www.gyan.dev/ffmpeg/builds/ffmpeg-release-essentials.7z"
]
},
"Google Chrome": {
"mirrors": [
"https://dl.google.com/chrome/install/latest/chrome_installer.exe"
],
"alias": "google-chrome"
},
"LibreOffice": {
"mirrors": [
"https://mirror.raiolanetworks.com/tdf/libreoffice/stable/24.8.4/win/x86_64/LibreOffice_24.8.4_Win_x86-64.msi",
"https://mirrors.iu13.net/tdf/libreoffice/stable/24.8.4/win/x86_64/LibreOffice_24.8.4_Win_x86-64.msi",
"https://download.documentfoundation.org/libreoffice/stable/24.8.4/win/x86_64/LibreOffice_24.8.4_Win_x86-64.msi"
]
},
"VLC": {
"mirrors": [
"https://ftp.free.org/mirrors/videolan/vlc/3.0.21/win64/vlc-3.0.21-win64.exe",
"https://mirror.fcix.net/videolan-ftp/vlc/3.0.21/win64/vlc-3.0.21-win64.exe",
"https://mirror.raiolanetworks.com/videolan/vlc/3.0.21/win64/vlc-3.0.21-win64.exe"
],
"alias": "vlc"
},
"GIMP": {
"mirrors": [
"https://www-ftp.lip6.fr/pub/gimp/gimp/v2.10/windows/gimp-2.10.38-setup.exe",
"https://download.gimp.org/gimp/v2.10/windows/gimp-2.10.38-setup.exe",
"https://www-ftp.lip6.fr/pub/gimp/gimp/v2.10/windows/gimp-2.10.0-setup.exe"
],
"alias": "gimp"
},
"VS Code": {
"mirrors": [
"https://update.code.visualstudio.com/latest/win32-x64-user/stable"
],
"alias": "code"
},
"Thunderbird": {
"mirrors": [
"https://download-installer.cdn.mozilla.net/pub/thunderbird/releases/115.12.1/win64/en-US/Thunderbird%20Setup%20115.12.1.exe",
"https://archive.mozilla.org/pub/thunderbird/releases/115.12.1/win64/en-US/Thunderbird%20Setup%20115.12.1.exe"
],
"alias": "thunderbird"
},
"Caddy Proxy": {
"mirrors": [
"https://caddyserver.com/api/download?os=windows&arch=amd64"
],
"alias": "caddy"
}
}
\ No newline at end of file
'''
python -m omniparserserver --som_model_path ../../weights/icon_detect/model.pt --caption_model_name florence2 --caption_model_path ../../weights/icon_caption_florence --device cuda --BOX_TRESHOLD 0.05
'''
import sys
import os
import time
from fastapi import FastAPI
from pydantic import BaseModel
import argparse
import uvicorn
root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
sys.path.append(root_dir)
from util.omniparser import Omniparser
def parse_arguments():
parser = argparse.ArgumentParser(description='Omniparser API')
parser.add_argument('--som_model_path', type=str, default='../../weights/icon_detect/model.pt', help='Path to the som model')
parser.add_argument('--caption_model_name', type=str, default='florence2', help='Name of the caption model')
parser.add_argument('--caption_model_path', type=str, default='../../weights/icon_caption_florence', help='Path to the caption model')
parser.add_argument('--device', type=str, default='cpu', help='Device to run the model')
parser.add_argument('--BOX_TRESHOLD', type=float, default=0.05, help='Threshold for box detection')
parser.add_argument('--host', type=str, default='0.0.0.0', help='Host for the API')
parser.add_argument('--port', type=int, default=8000, help='Port for the API')
args = parser.parse_args()
return args
args = parse_arguments()
config = vars(args)
app = FastAPI()
omniparser = Omniparser(config)
class ParseRequest(BaseModel):
base64_image: str
@app.post("/parse/")
async def parse(parse_request: ParseRequest):
print('start parsing...')
start = time.time()
dino_labled_img, parsed_content_list = omniparser.parse(parse_request.base64_image)
latency = time.time() - start
print('time:', latency)
return {"som_image_base64": dino_labled_img, "parsed_content_list": parsed_content_list, 'latency': latency}
@app.get("/probe/")
async def root():
return {"message": "Omniparser API ready"}
if __name__ == "__main__":
uvicorn.run("omniparserserver:app", host=args.host, port=args.port, reload=True)
\ No newline at end of file
<img src="../imgs/header_bar.png" alt="OmniTool Header" width="100%">
# OmniTool
Control a Windows 11 VM with OmniParser + your vision model of choice.
## Highlights:
1. **OmniParser V2** is 60% faster than V1 and now understands a wide variety of OS, app and inside app icons!
2. **OmniBox** uses 50% less disk space than other Windows VMs for agent testing, whilst providing the same computer use API
3. **OmniTool** supports out of the box the following vision models - OpenAI (4o/o1/o3-mini), DeepSeek (R1), Qwen (2.5VL) or Anthropic Computer Use
## Overview
There are three components:
<table style="border-collapse: collapse; border: none;">
<tr>
<td style="border: none;"><img src="../imgs/omniparsericon.png" width="50"></td>
<td style="border: none;"><strong>omniparserserver</strong></td>
<td style="border: none;">FastAPI server running OmniParser V2.</td>
</tr>
<tr>
<td style="border: none;"><img src="../imgs/omniboxicon.png" width="50"></td>
<td style="border: none;"><strong>omnibox</strong></td>
<td style="border: none;">A Windows 11 VM running in a Docker container.</td>
</tr>
<tr>
<td style="border: none;"><img src="../imgs/gradioicon.png" width="50"></td>
<td style="border: none;"><strong>gradio</strong></td>
<td style="border: none;">UI to provide commands and watch reasoning + execution on OmniBox</td>
</tr>
</table>
## Showcase Video
| OmniParser V2 | [Watch Video](https://1drv.ms/v/c/650b027c18d5a573/EWXbVESKWo9Buu6OYCwg06wBeoM97C6EOTG6RjvWLEN1Qg?e=alnHGC) |
|--------------|------------------------------------------------------------------|
| OmniTool | [Watch Video](https://1drv.ms/v/c/650b027c18d5a573/EehZ7RzY69ZHn-MeQHrnnR4BCj3by-cLLpUVlxMjF4O65Q?e=8LxMgX) |
## Notes:
1. Though **OmniParser V2** can run on a CPU, we have separated this out if you want to run it fast on a GPU machine
2. The **OmniBox** Windows 11 VM docker is dependent on KVM so can only run quickly on Windows and Linux. This can run on a CPU machine (doesn't need GPU).
3. The Gradio UI can also run on a CPU machine. We suggest running **omnibox** and **gradio** on the same CPU machine and **omniparserserver** on a GPU server.
## Setup
1. **omniparserserver**:
a. If you already have a conda environment for OmniParser, you can use that. Else follow the following steps to create one
b. Ensure conda is installed with `conda --version` or install from the [Anaconda website](https://www.anaconda.com/download/success)
c. Navigate to the root of the repo with `cd OmniParser`
d. Create a conda python environment with `conda create -n "omni" python==3.12`
e. Set the python environment to be used with `conda activate omni`
f. Install the dependencies with `pip install -r requirements.txt`
g. Continue from here if you already had the conda environment.
h. Ensure you have the V2 weights downloaded in weights folder (**ensure caption weights folder is called icon_caption_florence**). If not download them with:
```
rm -rf weights/icon_detect weights/icon_caption weights/icon_caption_florence
for folder in icon_caption icon_detect; do huggingface-cli download microsoft/OmniParser-v2.0 --local-dir weights --repo-type model --include "$folder/*"; done
mv weights/icon_caption weights/icon_caption_florence
```
h. Navigate to the server directory with `cd OmniParser/omnitool/omniparserserver`
i. Start the server with `python -m omniparserserver`
2. **omnibox**:
a. Install Docker Desktop
b. Visit [Microsoft Evaluation Center](https://info.microsoft.com/ww-landing-windows-11-enterprise.html), accept the Terms of Service, and download a **Windows 11 Enterprise Evaluation (90-day trial, English, United States)** ISO file [~6GB]. Rename the file to `custom.iso` and copy it to the directory `OmniParser/omnitool/omnibox/vm/win11iso`
c. Navigate to vm management script directory with`cd OmniParser/omnitool/omnibox/scripts`
d. Build the docker container [400MB] and install the ISO to a storage folder [20GB] with `./manage_vm.sh create`
e. After creating the first time it will store a save of the VM state in `vm/win11storage`. You can then manage the VM with `./manage_vm.sh start` and `./manage_vm.sh stop`. To delete the VM, use `./manage_vm.sh delete` and delete the `OmniParser/omnitool/omnibox/vm/win11storage` directory.
3. **gradio**:
a. Navigate to the gradio directory with `cd OmniParser/omnitool/gradio`
b. Ensure you have activated the conda python environment with `conda activate omni`
c. Start the server with `python app.py --windows_host_url localhost:8006 --omniparser_server_url localhost:8000`
d. Open the URL in the terminal output, set your API Key and start playing with the AI agent!
## Risks and Mitigations
To align with the Microsoft AI principles and Responsible AI practices, we conduct risk mitigation by training the icon caption model with Responsible AI data, which helps the model avoid inferring sensitive attributes (e.g.race, religion etc.) of the individuals which happen to be in icon images as much as possible. At the same time, we encourage user to apply OmniParser only for screenshot that does not contain harmful/violent content. For the OmniTool, we conduct threat model analysis using Microsoft Threat Modeling Tool. We advise human to stay in the loop in order to minimize risk.
## Acknowledgment
Kudos to the amazing resources that are invaluable in the development of our code: [Claude Computer Use](https://github.com/anthropics/anthropic-quickstarts/blob/main/computer-use-demo/README.md), [OS World](https://github.com/xlang-ai/OSWorld), [Windows Agent Arena](https://github.com/microsoft/WindowsAgentArena), and [computer_use_ootb](https://github.com/showlab/computer_use_ootb).
We are grateful for helpful suggestions and feedbacks provided by Francesco Bonacci, Jianwei Yang, Dillon DuPont, Yue Wu, Anh Nguyen.
# torch
easyocr
# torchvision
supervision==0.18.0
openai==1.3.5
# transformers
ultralytics==8.3.70
# azure-identity
# numpy==1.26.4
# opencv-python
# opencv-python-headless
gradio
dill
# accelerate
timm
einops==0.8.0
# paddlepaddle
paddleocr
ruff==0.6.7
pre-commit==3.8.0
pytest==8.3.3
pytest-asyncio==0.23.6
pyautogui==0.9.54
streamlit>=1.38.0
anthropic[bedrock,vertex]>=0.37.1
jsonschema==4.22.0
boto3>=1.28.57
google-auth<3,>=2
screeninfo
uiautomation
dashscope
groq
jupyter
\ No newline at end of file
from typing import List, Optional, Union, Tuple
import cv2
import numpy as np
from supervision.detection.core import Detections
from supervision.draw.color import Color, ColorPalette
class BoxAnnotator:
"""
A class for drawing bounding boxes on an image using detections provided.
Attributes:
color (Union[Color, ColorPalette]): The color to draw the bounding box,
can be a single color or a color palette
thickness (int): The thickness of the bounding box lines, default is 2
text_color (Color): The color of the text on the bounding box, default is white
text_scale (float): The scale of the text on the bounding box, default is 0.5
text_thickness (int): The thickness of the text on the bounding box,
default is 1
text_padding (int): The padding around the text on the bounding box,
default is 5
"""
def __init__(
self,
color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
thickness: int = 3, # 1 for seeclick 2 for mind2web and 3 for demo
text_color: Color = Color.BLACK,
text_scale: float = 0.5, # 0.8 for mobile/web, 0.3 for desktop # 0.4 for mind2web
text_thickness: int = 2, #1, # 2 for demo
text_padding: int = 10,
avoid_overlap: bool = True,
):
self.color: Union[Color, ColorPalette] = color
self.thickness: int = thickness
self.text_color: Color = text_color
self.text_scale: float = text_scale
self.text_thickness: int = text_thickness
self.text_padding: int = text_padding
self.avoid_overlap: bool = avoid_overlap
def annotate(
self,
scene: np.ndarray,
detections: Detections,
labels: Optional[List[str]] = None,
skip_label: bool = False,
image_size: Optional[Tuple[int, int]] = None,
) -> np.ndarray:
"""
Draws bounding boxes on the frame using the detections provided.
Args:
scene (np.ndarray): The image on which the bounding boxes will be drawn
detections (Detections): The detections for which the
bounding boxes will be drawn
labels (Optional[List[str]]): An optional list of labels
corresponding to each detection. If `labels` are not provided,
corresponding `class_id` will be used as label.
skip_label (bool): Is set to `True`, skips bounding box label annotation.
Returns:
np.ndarray: The image with the bounding boxes drawn on it
Example:
```python
import supervision as sv
classes = ['person', ...]
image = ...
detections = sv.Detections(...)
box_annotator = sv.BoxAnnotator()
labels = [
f"{classes[class_id]} {confidence:0.2f}"
for _, _, confidence, class_id, _ in detections
]
annotated_frame = box_annotator.annotate(
scene=image.copy(),
detections=detections,
labels=labels
)
```
"""
font = cv2.FONT_HERSHEY_SIMPLEX
for i in range(len(detections)):
x1, y1, x2, y2 = detections.xyxy[i].astype(int)
class_id = (
detections.class_id[i] if detections.class_id is not None else None
)
idx = class_id if class_id is not None else i
color = (
self.color.by_idx(idx)
if isinstance(self.color, ColorPalette)
else self.color
)
cv2.rectangle(
img=scene,
pt1=(x1, y1),
pt2=(x2, y2),
color=color.as_bgr(),
thickness=self.thickness,
)
if skip_label:
continue
text = (
f"{class_id}"
if (labels is None or len(detections) != len(labels))
else labels[i]
)
text_width, text_height = cv2.getTextSize(
text=text,
fontFace=font,
fontScale=self.text_scale,
thickness=self.text_thickness,
)[0]
if not self.avoid_overlap:
text_x = x1 + self.text_padding
text_y = y1 - self.text_padding
text_background_x1 = x1
text_background_y1 = y1 - 2 * self.text_padding - text_height
text_background_x2 = x1 + 2 * self.text_padding + text_width
text_background_y2 = y1
# text_x = x1 - self.text_padding - text_width
# text_y = y1 + self.text_padding + text_height
# text_background_x1 = x1 - 2 * self.text_padding - text_width
# text_background_y1 = y1
# text_background_x2 = x1
# text_background_y2 = y1 + 2 * self.text_padding + text_height
else:
text_x, text_y, text_background_x1, text_background_y1, text_background_x2, text_background_y2 = get_optimal_label_pos(self.text_padding, text_width, text_height, x1, y1, x2, y2, detections, image_size)
cv2.rectangle(
img=scene,
pt1=(text_background_x1, text_background_y1),
pt2=(text_background_x2, text_background_y2),
color=color.as_bgr(),
thickness=cv2.FILLED,
)
# import pdb; pdb.set_trace()
box_color = color.as_rgb()
luminance = 0.299 * box_color[0] + 0.587 * box_color[1] + 0.114 * box_color[2]
text_color = (0,0,0) if luminance > 160 else (255,255,255)
cv2.putText(
img=scene,
text=text,
org=(text_x, text_y),
fontFace=font,
fontScale=self.text_scale,
# color=self.text_color.as_rgb(),
color=text_color,
thickness=self.text_thickness,
lineType=cv2.LINE_AA,
)
return scene
def box_area(box):
return (box[2] - box[0]) * (box[3] - box[1])
def intersection_area(box1, box2):
x1 = max(box1[0], box2[0])
y1 = max(box1[1], box2[1])
x2 = min(box1[2], box2[2])
y2 = min(box1[3], box2[3])
return max(0, x2 - x1) * max(0, y2 - y1)
def IoU(box1, box2, return_max=True):
intersection = intersection_area(box1, box2)
union = box_area(box1) + box_area(box2) - intersection
if box_area(box1) > 0 and box_area(box2) > 0:
ratio1 = intersection / box_area(box1)
ratio2 = intersection / box_area(box2)
else:
ratio1, ratio2 = 0, 0
if return_max:
return max(intersection / union, ratio1, ratio2)
else:
return intersection / union
def get_optimal_label_pos(text_padding, text_width, text_height, x1, y1, x2, y2, detections, image_size):
""" check overlap of text and background detection box, and get_optimal_label_pos,
pos: str, position of the text, must be one of 'top left', 'top right', 'outer left', 'outer right' TODO: if all are overlapping, return the last one, i.e. outer right
Threshold: default to 0.3
"""
def get_is_overlap(detections, text_background_x1, text_background_y1, text_background_x2, text_background_y2, image_size):
is_overlap = False
for i in range(len(detections)):
detection = detections.xyxy[i].astype(int)
if IoU([text_background_x1, text_background_y1, text_background_x2, text_background_y2], detection) > 0.3:
is_overlap = True
break
# check if the text is out of the image
if text_background_x1 < 0 or text_background_x2 > image_size[0] or text_background_y1 < 0 or text_background_y2 > image_size[1]:
is_overlap = True
return is_overlap
# if pos == 'top left':
text_x = x1 + text_padding
text_y = y1 - text_padding
text_background_x1 = x1
text_background_y1 = y1 - 2 * text_padding - text_height
text_background_x2 = x1 + 2 * text_padding + text_width
text_background_y2 = y1
is_overlap = get_is_overlap(detections, text_background_x1, text_background_y1, text_background_x2, text_background_y2, image_size)
if not is_overlap:
return text_x, text_y, text_background_x1, text_background_y1, text_background_x2, text_background_y2
# elif pos == 'outer left':
text_x = x1 - text_padding - text_width
text_y = y1 + text_padding + text_height
text_background_x1 = x1 - 2 * text_padding - text_width
text_background_y1 = y1
text_background_x2 = x1
text_background_y2 = y1 + 2 * text_padding + text_height
is_overlap = get_is_overlap(detections, text_background_x1, text_background_y1, text_background_x2, text_background_y2, image_size)
if not is_overlap:
return text_x, text_y, text_background_x1, text_background_y1, text_background_x2, text_background_y2
# elif pos == 'outer right':
text_x = x2 + text_padding
text_y = y1 + text_padding + text_height
text_background_x1 = x2
text_background_y1 = y1
text_background_x2 = x2 + 2 * text_padding + text_width
text_background_y2 = y1 + 2 * text_padding + text_height
is_overlap = get_is_overlap(detections, text_background_x1, text_background_y1, text_background_x2, text_background_y2, image_size)
if not is_overlap:
return text_x, text_y, text_background_x1, text_background_y1, text_background_x2, text_background_y2
# elif pos == 'top right':
text_x = x2 - text_padding - text_width
text_y = y1 - text_padding
text_background_x1 = x2 - 2 * text_padding - text_width
text_background_y1 = y1 - 2 * text_padding - text_height
text_background_x2 = x2
text_background_y2 = y1
is_overlap = get_is_overlap(detections, text_background_x1, text_background_y1, text_background_x2, text_background_y2, image_size)
if not is_overlap:
return text_x, text_y, text_background_x1, text_background_y1, text_background_x2, text_background_y2
return text_x, text_y, text_background_x1, text_background_y1, text_background_x2, text_background_y2
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment