Added basic molecule implementation

2025-04-05 07:11:32 +01:00 · 2023-11-04 20:25:19 +01:00 · 2023-11-04 20:25:19 +01:00 · d814152224
commit d814152224
parent 080b47092c
5 changed files with 330 additions and 0 deletions
--- a/exmaple.asm
+++ b/exmaple.asm
@ -0,0 +1,16 @@
+//
+// Example NOT program
+//
+
+data:
+    //   ->
+    //   ||
+    // ---- 
+    <AB>[CD]
+
+instructions:
+    // -----
+    // -----
+    {A*B*C*D*E*}; <ABCDE>
+    // --
+    {A*B*}
--- a/src/SIMDDNA/init.py
+++ b/src/SIMDDNA/init.py
@ -0,0 +1,2 @@
+import ascii as ascii
+import molecule as molecule
--- a/src/SIMDDNA/ascii.py
+++ b/src/SIMDDNA/ascii.py
@ -0,0 +1,101 @@
+from molecule import isComplementary
+from molecule import nothing
+
+def toBindingLen(chainID, curPOS, molecule):
+    toBLen = 0
+
+    # is in front
+    for basePos in range(curPOS, len(molecule)):
+        if molecule.getBase(chainID, basePos) != nothing:
+            if isComplementary(molecule.getBase(chainID, basePos), molecule.getBase(0, basePos)):
+                return toBLen
+            
+        toBLen += 1
+
+    toBLen = 0
+
+    # is in back
+    for basePos in range(curPOS, -1, -1):
+        if molecule.getBase(chainID, basePos) != nothing:
+            if isComplementary(molecule.getBase(chainID, basePos), molecule.getBase(0, basePos)):
+                return toBLen
+            
+        toBLen -= 1
+
+
+    return None
+
+def showMolecule(molecule, spacing = ""):
+    Invlines = [
+        "", # register strand
+        "", # binding strand
+        "", # data strand
+    ]
+
+    for basePos in range(len(molecule)):
+            
+        if molecule.getBase(0, basePos) == nothing:
+            Invlines[0] += " "
+        else:
+            Invlines[0] += "-"
+
+        # find binded bases
+        bounded = False
+        for chainID in range(1, molecule.chainsCount()):
+
+            if molecule.getBase(chainID, basePos) == nothing:
+                continue
+
+            lenToBinding = toBindingLen(chainID, basePos, molecule)
+
+            if lenToBinding is None:
+                print(f"Warning: no binded for chain {chainID}")
+            elif lenToBinding == 0:
+                if bounded:
+                    print(f"Warning: two or more posible bindings on base {basePos}")
+                    
+                bounded     = True
+                    
+                Invlines[1] += "|"
+
+                if (basePos + 1 == len(molecule)) or (molecule.getBase(chainID, basePos + 1) == "-"):
+                    Invlines[2] += ">"
+                elif not isComplementary(molecule.getBase(chainID, basePos + 1), molecule.getBase(0, basePos + 1)):
+                    Invlines[2] += "/"
+                elif basePos > 0 and molecule.getBase(chainID, basePos - 1) != nothing and not isComplementary(molecule.getBase(chainID, basePos - 1), molecule.getBase(0, basePos - 1)):
+                    Invlines[2] += "\\"
+                else:
+                    Invlines[2] += "-"
+
+            elif lenToBinding > 0:
+                for _ in range(lenToBinding - len(Invlines) + 3):
+                    Invlines.append("")
+
+                for _ in range(basePos - len(Invlines[lenToBinding + 2])):
+                    Invlines[lenToBinding + 2] += " "
+
+                if len(Invlines[lenToBinding + 2]) > basePos and (Invlines[lenToBinding + 2][basePos] == "/"):
+                    Invlines[lenToBinding + 2] = Invlines[lenToBinding + 2][:-1] + 'X'
+                else:
+                    Invlines[lenToBinding + 2] += "\\"
+
+            elif lenToBinding < 0:
+                lenToBinding = abs(lenToBinding)
+
+                for _ in range(lenToBinding - len(Invlines) + 3):
+                    Invlines.append("")
+
+                for _ in range(basePos - len(Invlines[lenToBinding + 2])):
+                    Invlines[lenToBinding + 2] += " "
+
+                if len(Invlines[lenToBinding + 2]) > basePos and (Invlines[lenToBinding + 2][basePos] == "\\"):
+                    Invlines[lenToBinding + 2] = Invlines[lenToBinding + 2][:-1] + 'X'
+                else:
+                    Invlines[lenToBinding + 2] += "/"
+
+        if not bounded:
+            Invlines[1] += " "
+            Invlines[2] += " "
+
+    for line in reversed(Invlines):
+        print(spacing.join(line))
--- a/src/SIMDDNA/molecule.py
+++ b/src/SIMDDNA/molecule.py
@ -0,0 +1,172 @@
+#
+# [aa] - double strand of aa (domain a and domain a)
+# {aa} - single strand of aa (domain a and domain a) [upper]
+# <aa> - single strand of aa (domain a and domain a) [downer]
+# 
+# example:
+# <abc>[ABC]*{abc}<C>{ba}*[CC]*{CA}
+#
+#                          c
+#                  b    b        A
+#                    a        C
+#          A* B* C*     C* C*
+#          |  |  |      |  |
+# a  b  c  A  B  C   C  C  C
+#
+
+nothing = "-"
+
+def isComplementary(baseA, baseB):
+    return (f"{baseA}*" == baseB) or (baseA == f"{baseB}*")
+
+class Molecule:
+    def __init__(self):
+        self.chains = []
+
+    def __len__(self):
+        return self.maxChainSize()
+
+    def chainsCount(self):
+        return len(self.chains)
+
+    def endPad(self):
+        targetSize = self.maxChainSize()
+
+        for id in range(self.chainsCount()):
+            while (targetSize > len(self.chains[id])):
+                self.chains[id].append(nothing)
+
+    def addToChain(self, base, chainId):
+        self.chains[chainId].append(base)
+
+    def maxChainSize(self):
+        maxLen = 0
+        for chain in self.chains:
+            maxLen = max(maxLen, len(chain))
+
+        return maxLen
+    
+    def padChain(self, chainID, count):
+        for _ in range(count):
+            self.chains[chainID].insert(0, nothing)
+
+    def padChainToLen(self, chainID, targetID):
+        targetSize = len(self.chains[targetID])
+        curSize    = len(self.chains[chainID])
+
+        self.padChain(chainID, targetSize - curSize)
+
+    def padAllChains(self, length):
+        for id in range(len(self.chains)):
+            self.padChain(id, length)
+
+    def addBase(self, chainID, base):
+        self.chains[chainID].append(base)
+
+    def addChain(self):
+        chainId = len(self.chains)
+        self.chains.append([])
+        return chainId
+    
+    def updateBase(self, chainID, baseID, base):
+        self.chains[chainID][baseID] = base
+
+    def getBase(self, chainID, baseID):
+        return self.chains[chainID][baseID]
+    
+    def charAddBase(self, chainID, char, isBackward = False):
+        if isBackward:
+            if len(self.chains[chainID]) == 0 or self.getBase(chainID, 0) != nothing:
+                self.padAllChains(1)
+            
+            del self.chains[chainID][0]
+
+        if char == "*":
+            curBase = self.getBase(chainID, -1)
+            if curBase[-1] == "*":
+                self.updateBase(chainID, -1, f"{curBase[0]}")
+            else:
+                self.updateBase(chainID, -1, f"{curBase}*")
+        else:
+            self.addBase(chainID, char)
+    
+    def rawPrint(self):
+        for chan in self.chains:
+            raw = ""
+            for base in chan:
+                raw += base.ljust(3, " ")
+
+            print(raw)
+
+def parse(notationStr):
+    newMolecule  = Molecule()
+    state        = "init"
+    lowerChain   = newMolecule.addChain()
+    lastChain    = None
+    isBackward   = False
+    workingChain = None
+
+    for char in notationStr:
+        
+        # check if is not whitespace
+        if char.isspace():
+            continue
+
+        if state == "init":
+            
+            if   char == "<":
+                lastChain = state = "lower"
+            elif char == "[":
+                lastChain = state = "double"
+            elif char == "{":
+                lastChain = state = "upper"
+            elif char == ".":
+                if lastChain is not None and lastChain != "lower":
+                    workingChain = -1
+                else:
+                    print("lastchain is none or lower but have . (chain concatenation operation)")
+                    exit(1)
+            else:
+                print("Parsing error expecting <, [, { or . but have " + char)
+                exit(1)
+
+        elif state == "lower":
+            if char == ">":
+                state = "init"
+                continue
+
+            newMolecule.charAddBase(lowerChain, char)
+
+        elif state == "upper":
+            if char == "}":
+                workingChain = None
+                isBackward   = False
+                state        = "init"
+                continue
+
+            if workingChain is None:
+                isBackward   = True
+                workingChain = newMolecule.addChain()
+                newMolecule.padChainToLen(workingChain, lowerChain)
+
+            newMolecule.charAddBase(workingChain, char, isBackward)
+            
+        elif state == "double":
+            if char == "]":
+                workingChain = None
+                state        = "init"
+                continue
+
+            if workingChain is None:
+                workingChain = newMolecule.addChain()
+                newMolecule.padChainToLen(workingChain, lowerChain)
+
+            newMolecule.charAddBase(lowerChain,   char)
+            newMolecule.charAddBase(workingChain, char)
+
+            if char != "*":
+                newMolecule.charAddBase(workingChain, "*")
+
+    newMolecule.endPad()
+
+    return newMolecule
--- a/src/SIMDDNA/register.py
+++ b/src/SIMDDNA/register.py
@ -0,0 +1,39 @@
+import molecule
+import ascii
+
+class Register:
+    def __init__(self, mol = molecule.Molecule()):
+        self.set(mol)
+
+    def set(self, mol):
+        self.mol = mol
+
+    def inscription(self, IMols):
+        pass
+
+    def asciiShow(self, spaceing = ""):
+        return ascii.showMolecule(self.mol, spaceing)
+
+
+print("---------------------------------")
+print("Before")
+print("---------------------------------\n")
+
+# create register
+myreg = Register(molecule.parse(
+    "<AB>[CD]"
+))
+
+myreg.asciiShow()
+
+print("\n---------------------------------")
+print("After")
+print("---------------------------------\n")
+
+# do inscription
+myreg.inscription([
+    molecule.parse("{A*B*C*D*E*}"),
+    molecule.parse("<ABCDE>")
+])
+
+myreg.asciiShow()