diff --git a/airootfs/installFiles/discover.py b/airootfs/installFiles/discover.py deleted file mode 100644 index ce71f80..0000000 --- a/airootfs/installFiles/discover.py +++ /dev/null @@ -1,174 +0,0 @@ -import socket -import json -import os, sys -from random import randint -from time import sleep, time - -HOSTS = {} -UDP_IP = "255.255.255.255" -UDP_PORT = 5005 -PROT_HDR = "SATURNARCH " -SEND_TIME = None - -TYPE = "slave" - -if os.path.exists("/etc/slurm-llnl/MASTER"): - TYPE = "master" - -MASTER = None - -sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_UDP) -sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) -sock.bind((UDP_IP, UDP_PORT)) - -def nfsDone(): - if MASTER is None: - return False - - with open('/etc/fstab') as myfile: - if MASTER["ip"] in myfile.read(): - return True - - return False - -def get_ip(): - s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) - s.settimeout(0) - try: - # doesn't even have to be reachable - s.connect(('8.8.8.8', 1)) - IP = s.getsockname()[0] - except Exception: - IP = '127.0.0.1' - finally: - s.close() - return IP - -def selfInfo(): - return { - "ip": get_ip(), - "type": TYPE, - "name": socket.gethostname(), - "cpus": os.cpu_count(), - "rams": os.sysconf('SC_PAGE_SIZE') * os.sysconf('SC_PHYS_PAGES') - } - - -def loadHosts(): - global HOSTS, MASTER - - try: - with open('/etc/slurm-llnl/hosts.json', 'r') as file: - HOSTS = json.load(file) - except: - HOSTS = {} - - if TYPE == "master": - MASTER = selfInfo() - else: - for host in HOSTS.values(): - if host["type"] == "master": - MASTER = host - -def updateHosts(): - with open("/etc/slurm-llnl/hosts.json", "w") as outfile: - json.dump(HOSTS, outfile) - -def generateSlurmConfig(source, target): - if MASTER is None: - return - - hosts = f"NodeName={socket.gethostname()} NodeAddr={get_ip()} CPUs={os.cpu_count()} State=UNKNOWN\n" # first is my self - noMasterHosts = "" - for host in HOSTS.values(): - hosts += f"NodeName={host["name"]} NodeAddr={host["ip"]} CPUs={host["cpus"]} State=UNKNOWN\n" - noMasterHosts += f"{host["name"]}, " - - if len(noMasterHosts) > 0: - noMasterHosts = noMasterHosts[:-2] - - with open(source) as f: - newText=f.read().replace('{%hosts%}', hosts).replace('{%noMasterHosts%}', noMasterHosts).replace('{%masterName%}', MASTER["name"]).replace('{%masterIP%}', MASTER["ip"]) - - with open(target, "w") as f: - f.write(newText) - -def generateHosts(target): - fileStr = """# Auto generated by SaturnArch -127.0.0.1\tlocalhost -::1\tlocalhost ip6-localhost ip6-loopback -ff02::1\tip6-allnodes -ff02::2\tip6-allrouters - -""" - fileStr += f"{get_ip()}\t{socket.gethostname()}\n" # first is my self - for host in HOSTS.values(): - fileStr += f"{host["ip"]}\t{host["name"]}\n" - - with open(target, "w") as outfile: - outfile.write(fileStr) - -def self_announcement(): - MESSAGE = (PROT_HDR + json.dumps(selfInfo())).encode("ASCII") - sock.sendto(MESSAGE, (UDP_IP, UDP_PORT)) - -## Start program -loadHosts() -self_announcement() - -while True: - if SEND_TIME is not None and SEND_TIME < int(time()): - print(f"Sending self announcement") - self_announcement() - SEND_TIME = None - sock.settimeout(None) - - data, addr = None, None - try: - data, addr = sock.recvfrom(1024) - data = data.decode("ASCII") - except socket.timeout: - continue - - if not data.startswith(PROT_HDR): - continue - - data = data[len(PROT_HDR):] # remove header - data = json.loads(data) - - if data["ip"] == get_ip(): - continue - - if data["ip"] in HOSTS and data == HOSTS[data["ip"]]: - continue - - print(f"Discover new HOST {data}") - - if data["type"] == "master": - MASTER = data - - HOSTS[data["ip"]] = data - updateHosts() - generateHosts("/etc/hosts") - generateSlurmConfig("/etc/slurm-llnl/slurm.conf.template", "/etc/slurm-llnl/slurm.conf") - - # configure network disks - if TYPE == "slave" and MASTER is not None and not nfsDone(): - os.system(f"echo \"{MASTER['ip']}:/clusterfs /clusterfs nfs defaults 0 0\" >> /etc/fstab") - os.system(f"echo \"{MASTER['ip']}:/home /home nfs defaults 0 0\" >> /etc/fstab") - os.system("mount -a") - - os.system("cp -f /clusterfs/munge.key /etc/munge/munge.key") - - # reset all services - os.system("systemctl restart munge") - os.system("systemctl restart slurmd") - - if TYPE == "master": - os.system("systemctl restart slurmctld") - - # plan next send - waitTime = randint(10,100) - print(f"Plan self announcement at T+{waitTime}s") - SEND_TIME = int(time()) + waitTime - sock.settimeout(waitTime / 2) \ No newline at end of file diff --git a/airootfs/installFiles/saturnDiscover.service b/airootfs/installFiles/saturnDiscover.service deleted file mode 100644 index ef2d89f..0000000 --- a/airootfs/installFiles/saturnDiscover.service +++ /dev/null @@ -1,15 +0,0 @@ -[Unit] -Description=SaturnArch discover Service -After=network.target - -[Service] -StandardError=journal -StandardOutput=journal -StandardInput=null -Type=idle -Restart=on-failure -User=root -ExecStart=/usr/bin/python /usr/bin/discover.py - -[Install] -WantedBy=multi-user.target \ No newline at end of file diff --git a/pacman.conf b/pacman.conf index 9d895e9..d4fee4d 100644 --- a/pacman.conf +++ b/pacman.conf @@ -100,3 +100,7 @@ Include = /etc/pacman.d/mirrorlist #[custom] #SigLevel = Optional TrustAll #Server = file:///home/custompkgs + +[saturn_repo] +SigLevel = Optional TrustAll +Server = https://git.plevac.eu/Betynda/SaturnArch-REPO/raw/branch/main/$arch diff --git a/versions/master/airootfs/fast_install.sh b/versions/master/airootfs/fast_install.sh index 2116982..022ad9e 100644 --- a/versions/master/airootfs/fast_install.sh +++ b/versions/master/airootfs/fast_install.sh @@ -112,9 +112,7 @@ mkdir /mnt/etc/slurm-llnl cp fast_install_stage2.sh /mnt cp environment.sh /mnt -cp /installFiles/discover.py /mnt/usr/bin/discover.py cp -rf /installFiles/slurm/* /mnt/etc/slurm-llnl -cp /installFiles/saturnDiscover.service /mnt/lib/systemd/system/saturnDiscover.service cp /usr/local/bin/apt /mnt/usr/bin/apt arch-chroot /mnt /fast_install_stage2.sh diff --git a/versions/master/airootfs/fast_install_stage2.sh b/versions/master/airootfs/fast_install_stage2.sh index 8f463eb..d16ada6 100644 --- a/versions/master/airootfs/fast_install_stage2.sh +++ b/versions/master/airootfs/fast_install_stage2.sh @@ -21,7 +21,7 @@ pacman -Syy pacman -S --noconfirm man-pages man-db dnsutils ethtool iputils net-tools iproute2 openssh wget \ usbutils usb_modeswitch tcpdump smartmontools gnu-netcat mc dosfstools exfat-utils \ partclone parted partimage gptfdisk iw dialog base-devel vim \ -grub os-prober efivar efibootmgr efitools intel-ucode amd-ucode dmidecode htop nano python slurm-llnl nfs-utils +grub os-prober efivar efibootmgr efitools intel-ucode amd-ucode dmidecode htop nano python slurm-llnl nfs-utils saturn-discover lmod python-pipenv cd /usr/bin/ ln -s vim vi @@ -110,8 +110,10 @@ sudo chmod -R 777 /clusterfs echo "/clusterfs (rw,sync,no_root_squash,no_subtree_check)" >> /etc/exports echo "/home (rw,sync,no_root_squash,no_subtree_check)" >> /etc/exports -# copy keys -cp /etc/munge/munge.key /clusterfs +# copy keys and config +mkdir /clusterfs/config +cp /etc/munge/munge.key /clusterfs/config +cp /etc/slurm-llnl/slurm.conf.template /clusterfs/config systemctl enable munge systemctl enable slurmd diff --git a/versions/slave/airootfs/fast_install.sh b/versions/slave/airootfs/fast_install.sh index 8b9b689..1c42a3e 100644 --- a/versions/slave/airootfs/fast_install.sh +++ b/versions/slave/airootfs/fast_install.sh @@ -112,9 +112,7 @@ mkdir /mnt/etc/slurm-llnl cp fast_install_stage2.sh /mnt cp environment.sh /mnt -cp /installFiles/discover.py /mnt/usr/bin/discover.py cp -rf /installFiles/slurm/* /mnt/etc/slurm-llnl -cp /installFiles/saturnDiscover.service /mnt/lib/systemd/system/saturnDiscover.service cp /usr/local/bin/apt /mnt/usr/bin/apt arch-chroot /mnt /fast_install_stage2.sh diff --git a/versions/slave/airootfs/fast_install_stage2.sh b/versions/slave/airootfs/fast_install_stage2.sh index d814dc7..8fb8b02 100644 --- a/versions/slave/airootfs/fast_install_stage2.sh +++ b/versions/slave/airootfs/fast_install_stage2.sh @@ -21,7 +21,7 @@ pacman -Syy pacman -S --noconfirm man-pages man-db dnsutils ethtool iputils net-tools iproute2 openssh wget \ usbutils usb_modeswitch tcpdump smartmontools gnu-netcat mc dosfstools exfat-utils \ partclone parted partimage gptfdisk iw dialog base-devel vim \ -grub os-prober efivar efibootmgr efitools intel-ucode amd-ucode dmidecode htop nano python slurm-llnl nfs-utils +grub os-prober efivar efibootmgr efitools intel-ucode amd-ucode dmidecode htop nano python slurm-llnl nfs-utils saturn-discover lmod python-pipenv cd /usr/bin/ ln -s vim vi