diff --git a/versions/lite/airootfs/fast_install.sh b/versions/lite/airootfs/fast_install.sh index 31f39dd..36b2540 100644 --- a/versions/lite/airootfs/fast_install.sh +++ b/versions/lite/airootfs/fast_install.sh @@ -110,7 +110,8 @@ echo "Entering Chroot Environment" cp fast_install_stage2.sh /mnt -cp environment.sh /mnt +cp environment.sh /mnt +cp /usr/local/bin/apt /mnt/usr/bin/apt arch-chroot /mnt /fast_install_stage2.sh diff --git a/versions/lite/profiledef.sh b/versions/lite/profiledef.sh index 0d0e909..e01cd1c 100644 --- a/versions/lite/profiledef.sh +++ b/versions/lite/profiledef.sh @@ -5,7 +5,7 @@ iso_name="SaturnArch" iso_label="SATURNARCH_LITE" iso_publisher="Lukas Plevac " iso_application="SaturnArch Linux" -iso_version="$(date +%Y.%m.%d)" +iso_version="lite" install_dir="arch" bootmodes=('bios.syslinux.mbr' 'bios.syslinux.eltorito' 'uefi-x64.systemd-boot.esp' 'uefi-x64.systemd-boot.eltorito') arch="x86_64" diff --git a/versions/master/airootfs/discover.py b/versions/master/airootfs/discover.py new file mode 100644 index 0000000..b8f7478 --- /dev/null +++ b/versions/master/airootfs/discover.py @@ -0,0 +1,166 @@ +import socket +import json +import os, sys +from random import randint +from time import sleep, time + +HOSTS = {} +UDP_IP = "255.255.255.255" +UDP_PORT = 5005 +PROT_HDR = "SATURNARCH " +SEND_TIME = None + +TYPE = "slave" + +if os.path.exists("/etc/slurm-llnl/MASTER"): + TYPE = "master" + +MASTER = None + +sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_UDP) +sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) +sock.bind((UDP_IP, UDP_PORT)) + +def nfsDone(): + with open('myfile.txt') as myfile: + if MASTER["ip"] in myfile.read(): + return True + + return False + +def get_ip(): + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.settimeout(0) + try: + # doesn't even have to be reachable + s.connect(('8.8.8.8', 1)) + IP = s.getsockname()[0] + except Exception: + IP = '127.0.0.1' + finally: + s.close() + return IP + +def selfInfo(): + return { + "ip": get_ip(), + "type": TYPE, + "name": socket.gethostname(), + "cpus": os.cpu_count(), + "rams": os.sysconf('SC_PAGE_SIZE') * os.sysconf('SC_PHYS_PAGES') + } + + +def loadHosts(): + try: + with open('/etc/slurm-llnl/hosts.json', 'r') as file: + HOSTS = json.load(file) + except: + HOSTS = {} + + if TYPE == "master": + MASTER = selfInfo() + else: + for host in HOSTS.values(): + if host["type"] == "master": + MASTER = host + +def updateHosts(): + with open("/etc/slurm-llnl/hosts.json", "w") as outfile: + json.dump(HOSTS, outfile) + +def generateSlurmConfig(source, target): + hosts = f"NodeName={socket.gethostname()} NodeAddr={get_ip()} CPUs={os.cpu_count()} State=UNKNOWN\n" # first is my self + noMasterHosts = "" + for host in HOSTS.values(): + hosts += f"NodeName={host["name"]} NodeAddr={host["ip"]} CPUs={host["cpus"]} State=UNKNOWN\n" + noMasterHosts += f"{host["name"]}, " + + if len(noMasterHosts) > 0: + noMasterHosts = noMasterHosts[:-2] + + with open(source) as f: + newText=f.read().replace('{%hosts%}', hosts).replace('{%noMasterHosts%}', noMasterHosts).replace('{%masterName%}', MASTER["name"]).replace('{%masterIP%}', MASTER["ip"]) + + with open(target, "w") as f: + f.write(newText) + +def generateHosts(target): + fileStr = """# Auto generated by SaturnArch +127.0.0.1\tlocalhost +::1\tlocalhost ip6-localhost ip6-loopback +ff02::1\tip6-allnodes +ff02::2\tip6-allrouters + +""" + fileStr += f"{get_ip()}\t{socket.gethostname()}\n" # first is my self + for host in HOSTS.values(): + fileStr += f"{host["ip"]}\t{host["name"]}\n" + + with open(target, "w") as outfile: + outfile.write(fileStr) + +def self_announcement(): + MESSAGE = (PROT_HDR + json.dumps(selfInfo())).encode("ASCII") + sock.sendto(MESSAGE, (UDP_IP, UDP_PORT)) + +## Start program +loadHosts() +self_announcement() + +while True: + if SEND_TIME is not None and SEND_TIME < int(time()): + print(f"Sending self announcement") + self_announcement() + SEND_TIME = None + sock.settimeout(None) + + data, addr = None, None + try: + data, addr = sock.recvfrom(1024) + data = data.decode("ASCII") + except socket.timeout: + continue + + if not data.startswith(PROT_HDR): + continue + + data = data[len(PROT_HDR):] # remove header + data = json.loads(data) + + if data["ip"] == get_ip(): + continue + + if data["ip"] in HOSTS and data == HOSTS[data["ip"]]: + continue + + print(f"Discover new HOST {data}") + + if data["type"] == "master": + MASTER = data + + HOSTS[data["ip"]] = data + updateHosts() + generateHosts("/etc/hosts") + generateSlurmConfig("/etc/slurm-llnl/slurm.conf.template", "/etc/slurm-llnl/slurm.conf") + + # configure network disks + if TYPE == "slave" and MASTER is not None and not nfsDone(): + os.system(f"echo \"{MASTER['ip']}:/clusterfs /clusterfs nfs defaults 0 0\" >> /etc/fstab") + os.system(f"echo \"{MASTER['ip']}:/home /home nfs defaults 0 0\" >> /etc/fstab") + os.system("mount -a") + + os.system("cp -f /clusterfs/munge.key /etc/munge/munge.key") + + # reset all services + os.system("systemctl restart munge") + os.system("systemctl restart slurmd") + + if TYPE == "master": + os.system("systemctl restart slurmctld") + + # plan next send + waitTime = randint(10,100) + print(f"Plan self announcement at T+{waitTime}s") + SEND_TIME = int(time()) + waitTime + sock.settimeout(waitTime / 2) \ No newline at end of file diff --git a/versions/master/airootfs/fast_install.sh b/versions/master/airootfs/fast_install.sh new file mode 100644 index 0000000..c4f966e --- /dev/null +++ b/versions/master/airootfs/fast_install.sh @@ -0,0 +1,134 @@ +USER="user" +PASSWORD="saturn" +NET=`ip -br l | awk '$1 !~ "lo|vir|wl" { print $1}'|head -n 1` +ALL_NICS=`ip -br l | awk '{ print $1}'` +DISK1=`lsblk -dn |awk '{print $1}'|grep -E "sda|nvme"|head -n 1` + +echo +echo "NOTE: " +echo " * This script will setup system to use DHCP by default." +echo " * If you have a single wired NIC on a network with DHCP it should work by default." +echo " * Same password is the same for root and non root user by default. Change this after install or override." +echo " * The default selected disk is the first block device found." +echo " * This installer should support both BIOS and UEFI." +echo " * Swap is set to zero" +echo " * Timezone, locale, and keyboard layout are hardcoded. Override if needed." +echo;echo + +echo "Default non-root user: ${USER}" +echo "Default password: ${PASSWORD}" +echo + +echo "Selected wired interface: " +echo $NET +echo "All interfaces found:" +echo $ALL_NICS +echo + +echo "Selected disk:" +echo $DISK1 +echo + +echo "Disks on system:" +lsblk -d +echo + + +DISK="/dev/$DISK1" + +echo export USER=${USER} > environment.sh +echo export PASSWORD=${PASSWORD} >> environment.sh +echo export NET=${NET} >> environment.sh +echo export ALL_NICS=${ALL_NICS} >> environment.sh +echo export DISK1=${DISK1} >> environment.sh +echo export DISK=${DISK} >> environment.sh + +chmod a+x environment.sh + +START=1 +ESP=$(( $START+512 )) +BIOS_BOOT=$(( $ESP+2 )) +ROOT=100% + +echo +echo "Wiping Disk" + +wipefs -a $DISK + +echo +echo +echo "Creating Label" + +parted -s ${DISK} mklabel gpt + +echo +echo +echo "Partitioning" + +parted -s --align=optimal ${DISK} mkpart ESP fat32 ${START}MiB ${ESP}MiB +parted -s ${DISK} set 1 esp on +parted -s --align=optimal ${DISK} mkpart BIOS_BOOT fat32 ${ESP}MiB ${BIOS_BOOT}MiB +parted -s ${DISK} set 2 bios_grub on +parted -s --align=optimal ${DISK} mkpart linux ${BIOS_BOOT}MiB 100% + +parted -s ${DISK} print + +echo +echo "Formatting Filesystems" + + +mkfs.ext4 -F ${DISK}3 +mkfs.fat -F 32 ${DISK}1 + +mount ${DISK}3 /mnt +mkdir -p /mnt/boot/efi +mount ${DISK}1 /mnt/boot/efi + +echo +echo "Pacstrapping System" + +#update keys +echo "update keys" +gpg --refresh-keys +pacman-key --init && pacman-key --populate +pacman-key --refresh-keys + +pacstrap -K /mnt base linux linux-firmware + +echo +echo "Generating Filesystem Table" + +genfstab -U /mnt >> /mnt/etc/fstab + + +echo +echo ${PASSWORD} +echo ${USER} +echo ${DISK} +echo +echo "Entering Chroot Environment" + +mkdir /mnt/etc/slurm-llnl + +cp fast_install_stage2.sh /mnt +cp environment.sh /mnt +cp discover.py /mnt/usr/bin/discover.py +cp -rf slurm/* /mnt/etc/slurm-llnl +cp saturnDiscover.service /mnt/lib/systemd/system/saturnDiscover.service +cp /usr/local/bin/apt /mnt/usr/bin/apt + +arch-chroot /mnt /fast_install_stage2.sh + +echo +echo "One Last Link" + + +ln -sf /run/systemd/resolve/stub-resolv.conf /mnt/etc/resolv.conf + + +reboot + + + + + diff --git a/versions/master/airootfs/fast_install_stage2.sh b/versions/master/airootfs/fast_install_stage2.sh new file mode 100644 index 0000000..5ec66ed --- /dev/null +++ b/versions/master/airootfs/fast_install_stage2.sh @@ -0,0 +1,128 @@ + +echo "Inside Chroot Env" +echo +source /environment.sh +pwd +echo +echo ${PASSWORD} +echo ${USER} +echo ${DISK} + + +echo "Installing Important Packages" + +mkinitcpio -P + +#install from lists +gpg --refresh-keys +pacman-key --init && pacman-key --populate +pacman-key --refresh-keys +pacman -Syy + +pacman -S --noconfirm man-pages man-db dnsutils ethtool iputils net-tools iproute2 openssh wget \ +usbutils usb_modeswitch tcpdump smartmontools gnu-netcat mc dosfstools exfat-utils \ +partclone parted partimage gptfdisk iw dialog base-devel vim \ +grub os-prober efivar efibootmgr efitools intel-ucode amd-ucode dmidecode htop nano python slurm-llnl nfs-utils + +cd /usr/bin/ +ln -s vim vi + +echo "Setup Timezone and Locale" + +ln -sf /usr/share/zoneinfo/Europe/Prague /etc/localtime + +hwclock --systohc + +echo "en_GB.UTF-8 UTF-8" >> /etc/locale.gen +locale-gen +echo "LANG=en_GB.UTF-8" >> /etc/locale.conf + +echo "Network Setup" + +dmidecode --string system-uuid | cut -c1-8 > /etc/hostname + +echo "127.0.0.1 localhost" > /etc/hosts +echo "::1 localhost" >> /etc/hosts + +# for IPv6: +# DHCP=yes + +echo "[Match]" > /etc/systemd/network/20-wired.network +echo "Name=${NET}" >> /etc/systemd/network/20-wired.network +echo "" >> /etc/systemd/network/20-wired.network +echo "[Network]" >> /etc/systemd/network/20-wired.network +echo "DHCP=ipv4" >> /etc/systemd/network/20-wired.network +echo "" >> /etc/systemd/network/20-wired.network +echo "[DHCPv6]" >> /etc/systemd/network/20-wired.network +echo "UseDomains=true" >> /etc/systemd/network/20-wired.network + +echo "Adding Users" + +useradd -m -G wheel,users -s /bin/bash ${USER} +yes ${PASSWORD} | passwd +yes ${PASSWORD} | passwd ${USER} + +echo "Installing GRUB" + +mkdir /boot/grub +grub-mkconfig -o /boot/grub/grub.cfg +grub-install ${DISK} + + +echo "Enabling services" +systemctl enable systemd-networkd +systemctl enable systemd-resolved +systemctl enable systemd-timesyncd +systemctl enable sshd + +timedatectl set-timezone $timezone +timedatectl set-ntp true + +echo "Generating issue" + +echo " ___ ,-\`." > /etc/issue +echo " .-~~ ~~-.,-~ _~ " >> /etc/issue +echo " #\` \`._- " >> /etc/issue +echo " .\` _-~. " >> /etc/issue +echo " | _- | " >> /etc/issue +echo " \` _-~ ' " >> /etc/issue +echo " . _-~\`. _-~ .' " >> /etc/issue +echo " ,-' _,-~\`-__ __-' " >> /etc/issue +echo ",.-~\` . ~~~ " >> /etc/issue +echo " " >> /etc/issue +echo " SatrunArch " >> /etc/issue +echo >> /etc/issue +echo "IP Addresses:" >> /etc/issue +echo " \\4" >> /etc/issue +echo " \\6" >> /etc/issue +echo >> /etc/issue +echo "Default login:" >> /etc/issue +echo " user: ${USER}" >> /etc/issue +echo " pass: ${PASSWORD}" >> /etc/issue +echo >> /etc/issue + +# setup cluster + +sudo mkdir /clusterfs +sudo chown nobody.nogroup /clusterfs +sudo chmod -R 777 /clusterfs + +# todo security check here +echo "/clusterfs 0.0.0.0/0(rw,sync,no_root_squash,no_subtree_check)" >> /etc/exports +echo "/home 0.0.0.0/0(rw,sync,no_root_squash,no_subtree_check)" >> /etc/exports + +# copy keys +cp /etc/munge/munge.key /clusterfs + +systemctl enable munge +systemctl enable slurmd +systemctl enable slurmctld +systemctl enable nfs-server +systemctl enable saturnDiscover + +touch /etc/slurm-llnl/MASTER + +echo "Exiting Chroot Environment" + +exit + diff --git a/versions/master/airootfs/saturnDiscover.service b/versions/master/airootfs/saturnDiscover.service new file mode 100644 index 0000000..04fd480 --- /dev/null +++ b/versions/master/airootfs/saturnDiscover.service @@ -0,0 +1,12 @@ +[Unit] +Description=SaturnArch discover Service +After=network.target + +[Service] +Type=idle +Restart=on-failure +User=root +ExecStart=/usr/bin/python /usr/bin/discover.py + +[Install] +WantedBy=multi-user.target \ No newline at end of file diff --git a/versions/master/airootfs/slurm/cgroup.conf b/versions/master/airootfs/slurm/cgroup.conf new file mode 100644 index 0000000..3e3c63d --- /dev/null +++ b/versions/master/airootfs/slurm/cgroup.conf @@ -0,0 +1,14 @@ +CgroupMountpoint="/sys/fs/cgroup" +CgroupAutomount=yes +#CgroupReleaseAgentDir="/etc/slurm/cgroup" +AllowedDevicesFile="/etc/slurm/cgroup_allowed_devices_file.conf" +ConstrainCores=no +#TaskAffinity=no +ConstrainRAMSpace=yes +ConstrainSwapSpace=no +ConstrainDevices=no +AllowedRamSpace=100 +AllowedSwapSpace=0 +MaxRAMPercent=100 +MaxSwapPercent=100 +MinRAMSpace=30 diff --git a/versions/master/airootfs/slurm/cgroup_allowed_devices_file.conf b/versions/master/airootfs/slurm/cgroup_allowed_devices_file.conf new file mode 100644 index 0000000..e8681e1 --- /dev/null +++ b/versions/master/airootfs/slurm/cgroup_allowed_devices_file.conf @@ -0,0 +1,7 @@ +/dev/null +/dev/urandom +/dev/zero +/dev/sda* +/dev/cpu/*/* +/dev/pts/* +/home/* diff --git a/versions/master/airootfs/slurm/slurm.conf.template b/versions/master/airootfs/slurm/slurm.conf.template new file mode 100644 index 0000000..25e9756 --- /dev/null +++ b/versions/master/airootfs/slurm/slurm.conf.template @@ -0,0 +1,58 @@ +ClusterName=Betynda + +SlurmctldHost={%masterName%}({%masterIP%}) + +ProctrackType=proctrack/linuxproc + +ReturnToService=2 + +SlurmctldPidFile=/run/slurmctld.pid +SlurmdPidFile=/run/slurmd.pid +SlurmdSpoolDir=/var/lib/slurm/slurmd +StateSaveLocation=/var/lib/slurm/slurmctld + +SlurmUser=slurm +TaskPlugin=task/none + +SchedulerType=sched/backfill +SelectType=select/cons_tres +SelectTypeParameters=CR_Core + +AccountingStorageType=accounting_storage/none +JobCompType=jobcomp/none +JobAcctGatherType=jobacct_gather/none + +SlurmctldDebug=info +SlurmctldLogFile=/var/log/slurm/slurmctld.log + +SlurmdDebug=info +SlurmdLogFile=/var/log/slurm/slurmd.log + +{%hosts%} + +PartitionName=exp Nodes={%noMasterHosts%} Default=YES MaxTime=01:00:00 State=UP SuspendTime=3600 PriorityTier=100 +PartitionName=long Nodes={%noMasterHosts%} Default=NO MaxTime=168:00:00 State=UP SuspendTime=3600 PriorityTier=50 +PartitionName=debug Nodes=ALL Default=NO MaxTime=03:00:00 State=UP PriorityTier=150 + +## +## Power saving +## + +# timeout for power on +ResumeTimeout=600 + +# timeout for power off +SuspendTimeout=120 + +# Up and down maximaly 1 per minute +ResumeRate=1 +SuspendRate=1 + +# poweroff and on programs +ResumeProgram=/usr/local/bin/slurmResume +SuspendProgram=/usr/local/bin/slurmSuspend + +TreeWidth=1000 + +# wait until power on when reserve +SchedulerParameters=salloc_wait_nodes,sbatch_wait_nodes diff --git a/versions/master/airootfs/slurmResume b/versions/master/airootfs/slurmResume new file mode 100755 index 0000000..07923b3 --- /dev/null +++ b/versions/master/airootfs/slurmResume @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +echo "`date` User $USER invoked Resume $*" >>/var/log/slurm/power_save.log + +sudo etherwake b0:83:fe:d8:a6:e0 diff --git a/versions/master/airootfs/slurmSuspend b/versions/master/airootfs/slurmSuspend new file mode 100755 index 0000000..8d6d36a --- /dev/null +++ b/versions/master/airootfs/slurmSuspend @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +echo "`date` User $USER invoked Suspend $*" >>/var/log/slurm/power_save.log + +sshpass -p 4126 ssh -o "UserKnownHostsFile=/dev/null" -o "StrictHostKeyChecking=no" -t lukasplevac@10.0.0.101 "sudo /sbin/shutdown" diff --git a/versions/master/profiledef.sh b/versions/master/profiledef.sh new file mode 100644 index 0000000..fb75131 --- /dev/null +++ b/versions/master/profiledef.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +# shellcheck disable=SC2034 + +iso_name="SaturnArch" +iso_label="SATURNARCH_MASTER" +iso_publisher="Lukas Plevac " +iso_application="SaturnArch Linux" +iso_version="master" +install_dir="arch" +bootmodes=('bios.syslinux.mbr' 'bios.syslinux.eltorito' 'uefi-x64.systemd-boot.esp' 'uefi-x64.systemd-boot.eltorito') +arch="x86_64" +pacman_conf="pacman.conf" +airootfs_image_tool_options=('-comp' 'xz' '-Xbcj' 'x86' '-b' '1M' '-Xdict-size' '1M') +file_permissions=( + ["/etc/shadow"]="0:0:400" + ["/usr/local/bin/apt"]="1:1:1" + ["/root/fast_install.sh"]="1:1:1" + ["/root/fast_install_stage2.sh"]="1:1:1" +) diff --git a/versions/slave/airootfs/discover.py b/versions/slave/airootfs/discover.py new file mode 100644 index 0000000..b8f7478 --- /dev/null +++ b/versions/slave/airootfs/discover.py @@ -0,0 +1,166 @@ +import socket +import json +import os, sys +from random import randint +from time import sleep, time + +HOSTS = {} +UDP_IP = "255.255.255.255" +UDP_PORT = 5005 +PROT_HDR = "SATURNARCH " +SEND_TIME = None + +TYPE = "slave" + +if os.path.exists("/etc/slurm-llnl/MASTER"): + TYPE = "master" + +MASTER = None + +sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_UDP) +sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) +sock.bind((UDP_IP, UDP_PORT)) + +def nfsDone(): + with open('myfile.txt') as myfile: + if MASTER["ip"] in myfile.read(): + return True + + return False + +def get_ip(): + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.settimeout(0) + try: + # doesn't even have to be reachable + s.connect(('8.8.8.8', 1)) + IP = s.getsockname()[0] + except Exception: + IP = '127.0.0.1' + finally: + s.close() + return IP + +def selfInfo(): + return { + "ip": get_ip(), + "type": TYPE, + "name": socket.gethostname(), + "cpus": os.cpu_count(), + "rams": os.sysconf('SC_PAGE_SIZE') * os.sysconf('SC_PHYS_PAGES') + } + + +def loadHosts(): + try: + with open('/etc/slurm-llnl/hosts.json', 'r') as file: + HOSTS = json.load(file) + except: + HOSTS = {} + + if TYPE == "master": + MASTER = selfInfo() + else: + for host in HOSTS.values(): + if host["type"] == "master": + MASTER = host + +def updateHosts(): + with open("/etc/slurm-llnl/hosts.json", "w") as outfile: + json.dump(HOSTS, outfile) + +def generateSlurmConfig(source, target): + hosts = f"NodeName={socket.gethostname()} NodeAddr={get_ip()} CPUs={os.cpu_count()} State=UNKNOWN\n" # first is my self + noMasterHosts = "" + for host in HOSTS.values(): + hosts += f"NodeName={host["name"]} NodeAddr={host["ip"]} CPUs={host["cpus"]} State=UNKNOWN\n" + noMasterHosts += f"{host["name"]}, " + + if len(noMasterHosts) > 0: + noMasterHosts = noMasterHosts[:-2] + + with open(source) as f: + newText=f.read().replace('{%hosts%}', hosts).replace('{%noMasterHosts%}', noMasterHosts).replace('{%masterName%}', MASTER["name"]).replace('{%masterIP%}', MASTER["ip"]) + + with open(target, "w") as f: + f.write(newText) + +def generateHosts(target): + fileStr = """# Auto generated by SaturnArch +127.0.0.1\tlocalhost +::1\tlocalhost ip6-localhost ip6-loopback +ff02::1\tip6-allnodes +ff02::2\tip6-allrouters + +""" + fileStr += f"{get_ip()}\t{socket.gethostname()}\n" # first is my self + for host in HOSTS.values(): + fileStr += f"{host["ip"]}\t{host["name"]}\n" + + with open(target, "w") as outfile: + outfile.write(fileStr) + +def self_announcement(): + MESSAGE = (PROT_HDR + json.dumps(selfInfo())).encode("ASCII") + sock.sendto(MESSAGE, (UDP_IP, UDP_PORT)) + +## Start program +loadHosts() +self_announcement() + +while True: + if SEND_TIME is not None and SEND_TIME < int(time()): + print(f"Sending self announcement") + self_announcement() + SEND_TIME = None + sock.settimeout(None) + + data, addr = None, None + try: + data, addr = sock.recvfrom(1024) + data = data.decode("ASCII") + except socket.timeout: + continue + + if not data.startswith(PROT_HDR): + continue + + data = data[len(PROT_HDR):] # remove header + data = json.loads(data) + + if data["ip"] == get_ip(): + continue + + if data["ip"] in HOSTS and data == HOSTS[data["ip"]]: + continue + + print(f"Discover new HOST {data}") + + if data["type"] == "master": + MASTER = data + + HOSTS[data["ip"]] = data + updateHosts() + generateHosts("/etc/hosts") + generateSlurmConfig("/etc/slurm-llnl/slurm.conf.template", "/etc/slurm-llnl/slurm.conf") + + # configure network disks + if TYPE == "slave" and MASTER is not None and not nfsDone(): + os.system(f"echo \"{MASTER['ip']}:/clusterfs /clusterfs nfs defaults 0 0\" >> /etc/fstab") + os.system(f"echo \"{MASTER['ip']}:/home /home nfs defaults 0 0\" >> /etc/fstab") + os.system("mount -a") + + os.system("cp -f /clusterfs/munge.key /etc/munge/munge.key") + + # reset all services + os.system("systemctl restart munge") + os.system("systemctl restart slurmd") + + if TYPE == "master": + os.system("systemctl restart slurmctld") + + # plan next send + waitTime = randint(10,100) + print(f"Plan self announcement at T+{waitTime}s") + SEND_TIME = int(time()) + waitTime + sock.settimeout(waitTime / 2) \ No newline at end of file diff --git a/versions/slave/airootfs/fast_install.sh b/versions/slave/airootfs/fast_install.sh new file mode 100644 index 0000000..c4f966e --- /dev/null +++ b/versions/slave/airootfs/fast_install.sh @@ -0,0 +1,134 @@ +USER="user" +PASSWORD="saturn" +NET=`ip -br l | awk '$1 !~ "lo|vir|wl" { print $1}'|head -n 1` +ALL_NICS=`ip -br l | awk '{ print $1}'` +DISK1=`lsblk -dn |awk '{print $1}'|grep -E "sda|nvme"|head -n 1` + +echo +echo "NOTE: " +echo " * This script will setup system to use DHCP by default." +echo " * If you have a single wired NIC on a network with DHCP it should work by default." +echo " * Same password is the same for root and non root user by default. Change this after install or override." +echo " * The default selected disk is the first block device found." +echo " * This installer should support both BIOS and UEFI." +echo " * Swap is set to zero" +echo " * Timezone, locale, and keyboard layout are hardcoded. Override if needed." +echo;echo + +echo "Default non-root user: ${USER}" +echo "Default password: ${PASSWORD}" +echo + +echo "Selected wired interface: " +echo $NET +echo "All interfaces found:" +echo $ALL_NICS +echo + +echo "Selected disk:" +echo $DISK1 +echo + +echo "Disks on system:" +lsblk -d +echo + + +DISK="/dev/$DISK1" + +echo export USER=${USER} > environment.sh +echo export PASSWORD=${PASSWORD} >> environment.sh +echo export NET=${NET} >> environment.sh +echo export ALL_NICS=${ALL_NICS} >> environment.sh +echo export DISK1=${DISK1} >> environment.sh +echo export DISK=${DISK} >> environment.sh + +chmod a+x environment.sh + +START=1 +ESP=$(( $START+512 )) +BIOS_BOOT=$(( $ESP+2 )) +ROOT=100% + +echo +echo "Wiping Disk" + +wipefs -a $DISK + +echo +echo +echo "Creating Label" + +parted -s ${DISK} mklabel gpt + +echo +echo +echo "Partitioning" + +parted -s --align=optimal ${DISK} mkpart ESP fat32 ${START}MiB ${ESP}MiB +parted -s ${DISK} set 1 esp on +parted -s --align=optimal ${DISK} mkpart BIOS_BOOT fat32 ${ESP}MiB ${BIOS_BOOT}MiB +parted -s ${DISK} set 2 bios_grub on +parted -s --align=optimal ${DISK} mkpart linux ${BIOS_BOOT}MiB 100% + +parted -s ${DISK} print + +echo +echo "Formatting Filesystems" + + +mkfs.ext4 -F ${DISK}3 +mkfs.fat -F 32 ${DISK}1 + +mount ${DISK}3 /mnt +mkdir -p /mnt/boot/efi +mount ${DISK}1 /mnt/boot/efi + +echo +echo "Pacstrapping System" + +#update keys +echo "update keys" +gpg --refresh-keys +pacman-key --init && pacman-key --populate +pacman-key --refresh-keys + +pacstrap -K /mnt base linux linux-firmware + +echo +echo "Generating Filesystem Table" + +genfstab -U /mnt >> /mnt/etc/fstab + + +echo +echo ${PASSWORD} +echo ${USER} +echo ${DISK} +echo +echo "Entering Chroot Environment" + +mkdir /mnt/etc/slurm-llnl + +cp fast_install_stage2.sh /mnt +cp environment.sh /mnt +cp discover.py /mnt/usr/bin/discover.py +cp -rf slurm/* /mnt/etc/slurm-llnl +cp saturnDiscover.service /mnt/lib/systemd/system/saturnDiscover.service +cp /usr/local/bin/apt /mnt/usr/bin/apt + +arch-chroot /mnt /fast_install_stage2.sh + +echo +echo "One Last Link" + + +ln -sf /run/systemd/resolve/stub-resolv.conf /mnt/etc/resolv.conf + + +reboot + + + + + diff --git a/versions/slave/airootfs/fast_install_stage2.sh b/versions/slave/airootfs/fast_install_stage2.sh new file mode 100644 index 0000000..e0a0eca --- /dev/null +++ b/versions/slave/airootfs/fast_install_stage2.sh @@ -0,0 +1,119 @@ + +echo "Inside Chroot Env" +echo +source /environment.sh +pwd +echo +echo ${PASSWORD} +echo ${USER} +echo ${DISK} + + +echo "Installing Important Packages" + +mkinitcpio -P + +#install from lists +gpg --refresh-keys +pacman-key --init && pacman-key --populate +pacman-key --refresh-keys +pacman -Syy + +pacman -S --noconfirm man-pages man-db dnsutils ethtool iputils net-tools iproute2 openssh wget \ +usbutils usb_modeswitch tcpdump smartmontools gnu-netcat mc dosfstools exfat-utils \ +partclone parted partimage gptfdisk iw dialog base-devel vim \ +grub os-prober efivar efibootmgr efitools intel-ucode amd-ucode dmidecode htop nano python slurm-llnl nfs-utils + +cd /usr/bin/ +ln -s vim vi + +echo "Setup Timezone and Locale" + +ln -sf /usr/share/zoneinfo/Europe/Prague /etc/localtime + +hwclock --systohc + +echo "en_GB.UTF-8 UTF-8" >> /etc/locale.gen +locale-gen +echo "LANG=en_GB.UTF-8" >> /etc/locale.conf + +echo "Network Setup" + +dmidecode --string system-uuid | cut -c1-8 > /etc/hostname + +echo "127.0.0.1 localhost" > /etc/hosts +echo "::1 localhost" >> /etc/hosts + +# for IPv6: +# DHCP=yes + +echo "[Match]" > /etc/systemd/network/20-wired.network +echo "Name=${NET}" >> /etc/systemd/network/20-wired.network +echo "" >> /etc/systemd/network/20-wired.network +echo "[Network]" >> /etc/systemd/network/20-wired.network +echo "DHCP=ipv4" >> /etc/systemd/network/20-wired.network +echo "" >> /etc/systemd/network/20-wired.network +echo "[DHCPv6]" >> /etc/systemd/network/20-wired.network +echo "UseDomains=true" >> /etc/systemd/network/20-wired.network + +echo "Adding Users" + +useradd -m -G wheel,users -s /bin/bash ${USER} +yes ${PASSWORD} | passwd +yes ${PASSWORD} | passwd ${USER} + +echo "Installing GRUB" + +mkdir /boot/grub +grub-mkconfig -o /boot/grub/grub.cfg +grub-install ${DISK} + + +echo "Enabling services" +systemctl enable systemd-networkd +systemctl enable systemd-resolved +systemctl enable systemd-timesyncd +systemctl enable sshd + +timedatectl set-timezone $timezone +timedatectl set-ntp true + +echo "Generating issue" + +echo " ___ ,-\`." > /etc/issue +echo " .-~~ ~~-.,-~ _~ " >> /etc/issue +echo " #\` \`._- " >> /etc/issue +echo " .\` _-~. " >> /etc/issue +echo " | _- | " >> /etc/issue +echo " \` _-~ ' " >> /etc/issue +echo " . _-~\`. _-~ .' " >> /etc/issue +echo " ,-' _,-~\`-__ __-' " >> /etc/issue +echo ",.-~\` . ~~~ " >> /etc/issue +echo " " >> /etc/issue +echo " SatrunArch " >> /etc/issue +echo >> /etc/issue +echo "IP Addresses:" >> /etc/issue +echo " \\4" >> /etc/issue +echo " \\6" >> /etc/issue +echo >> /etc/issue +echo "Default login:" >> /etc/issue +echo " user: ${USER}" >> /etc/issue +echo " pass: ${PASSWORD}" >> /etc/issue +echo >> /etc/issue + +# setup cluster + +sudo mkdir /clusterfs +sudo chown nobody.nogroup /clusterfs +sudo chmod -R 777 /clusterfs + +systemctl enable munge +systemctl enable slurmd +systemctl enable slurmctld +systemctl enable nfs-server +systemctl enable saturnDiscover + +echo "Exiting Chroot Environment" + +exit + diff --git a/versions/slave/airootfs/saturnDiscover.service b/versions/slave/airootfs/saturnDiscover.service new file mode 100644 index 0000000..04fd480 --- /dev/null +++ b/versions/slave/airootfs/saturnDiscover.service @@ -0,0 +1,12 @@ +[Unit] +Description=SaturnArch discover Service +After=network.target + +[Service] +Type=idle +Restart=on-failure +User=root +ExecStart=/usr/bin/python /usr/bin/discover.py + +[Install] +WantedBy=multi-user.target \ No newline at end of file diff --git a/versions/slave/airootfs/slurm/cgroup.conf b/versions/slave/airootfs/slurm/cgroup.conf new file mode 100644 index 0000000..3e3c63d --- /dev/null +++ b/versions/slave/airootfs/slurm/cgroup.conf @@ -0,0 +1,14 @@ +CgroupMountpoint="/sys/fs/cgroup" +CgroupAutomount=yes +#CgroupReleaseAgentDir="/etc/slurm/cgroup" +AllowedDevicesFile="/etc/slurm/cgroup_allowed_devices_file.conf" +ConstrainCores=no +#TaskAffinity=no +ConstrainRAMSpace=yes +ConstrainSwapSpace=no +ConstrainDevices=no +AllowedRamSpace=100 +AllowedSwapSpace=0 +MaxRAMPercent=100 +MaxSwapPercent=100 +MinRAMSpace=30 diff --git a/versions/slave/airootfs/slurm/cgroup_allowed_devices_file.conf b/versions/slave/airootfs/slurm/cgroup_allowed_devices_file.conf new file mode 100644 index 0000000..e8681e1 --- /dev/null +++ b/versions/slave/airootfs/slurm/cgroup_allowed_devices_file.conf @@ -0,0 +1,7 @@ +/dev/null +/dev/urandom +/dev/zero +/dev/sda* +/dev/cpu/*/* +/dev/pts/* +/home/* diff --git a/versions/slave/airootfs/slurm/slurm.conf.template b/versions/slave/airootfs/slurm/slurm.conf.template new file mode 100644 index 0000000..25e9756 --- /dev/null +++ b/versions/slave/airootfs/slurm/slurm.conf.template @@ -0,0 +1,58 @@ +ClusterName=Betynda + +SlurmctldHost={%masterName%}({%masterIP%}) + +ProctrackType=proctrack/linuxproc + +ReturnToService=2 + +SlurmctldPidFile=/run/slurmctld.pid +SlurmdPidFile=/run/slurmd.pid +SlurmdSpoolDir=/var/lib/slurm/slurmd +StateSaveLocation=/var/lib/slurm/slurmctld + +SlurmUser=slurm +TaskPlugin=task/none + +SchedulerType=sched/backfill +SelectType=select/cons_tres +SelectTypeParameters=CR_Core + +AccountingStorageType=accounting_storage/none +JobCompType=jobcomp/none +JobAcctGatherType=jobacct_gather/none + +SlurmctldDebug=info +SlurmctldLogFile=/var/log/slurm/slurmctld.log + +SlurmdDebug=info +SlurmdLogFile=/var/log/slurm/slurmd.log + +{%hosts%} + +PartitionName=exp Nodes={%noMasterHosts%} Default=YES MaxTime=01:00:00 State=UP SuspendTime=3600 PriorityTier=100 +PartitionName=long Nodes={%noMasterHosts%} Default=NO MaxTime=168:00:00 State=UP SuspendTime=3600 PriorityTier=50 +PartitionName=debug Nodes=ALL Default=NO MaxTime=03:00:00 State=UP PriorityTier=150 + +## +## Power saving +## + +# timeout for power on +ResumeTimeout=600 + +# timeout for power off +SuspendTimeout=120 + +# Up and down maximaly 1 per minute +ResumeRate=1 +SuspendRate=1 + +# poweroff and on programs +ResumeProgram=/usr/local/bin/slurmResume +SuspendProgram=/usr/local/bin/slurmSuspend + +TreeWidth=1000 + +# wait until power on when reserve +SchedulerParameters=salloc_wait_nodes,sbatch_wait_nodes diff --git a/versions/slave/airootfs/slurmResume b/versions/slave/airootfs/slurmResume new file mode 100755 index 0000000..07923b3 --- /dev/null +++ b/versions/slave/airootfs/slurmResume @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +echo "`date` User $USER invoked Resume $*" >>/var/log/slurm/power_save.log + +sudo etherwake b0:83:fe:d8:a6:e0 diff --git a/versions/slave/airootfs/slurmSuspend b/versions/slave/airootfs/slurmSuspend new file mode 100755 index 0000000..8d6d36a --- /dev/null +++ b/versions/slave/airootfs/slurmSuspend @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +echo "`date` User $USER invoked Suspend $*" >>/var/log/slurm/power_save.log + +sshpass -p 4126 ssh -o "UserKnownHostsFile=/dev/null" -o "StrictHostKeyChecking=no" -t lukasplevac@10.0.0.101 "sudo /sbin/shutdown" diff --git a/versions/slave/profiledef.sh b/versions/slave/profiledef.sh new file mode 100644 index 0000000..484052e --- /dev/null +++ b/versions/slave/profiledef.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +# shellcheck disable=SC2034 + +iso_name="SaturnArch" +iso_label="SATURNARCH_SLAVE" +iso_publisher="Lukas Plevac " +iso_application="SaturnArch Linux" +iso_version="slave" +install_dir="arch" +bootmodes=('bios.syslinux.mbr' 'bios.syslinux.eltorito' 'uefi-x64.systemd-boot.esp' 'uefi-x64.systemd-boot.eltorito') +arch="x86_64" +pacman_conf="pacman.conf" +airootfs_image_tool_options=('-comp' 'xz' '-Xbcj' 'x86' '-b' '1M' '-Xdict-size' '1M') +file_permissions=( + ["/etc/shadow"]="0:0:400" + ["/usr/local/bin/apt"]="1:1:1" + ["/root/fast_install.sh"]="1:1:1" + ["/root/fast_install_stage2.sh"]="1:1:1" +)