Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Will testing #13

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
544 changes: 297 additions & 247 deletions SBOL3_simple_library4.nt

Large diffs are not rendered by default.

Binary file modified SBOL3_simple_library4.xlsx
Binary file not shown.
685 changes: 685 additions & 0 deletions SampleTemp3Output.nt

Large diffs are not rendered by default.

10 changes: 8 additions & 2 deletions excelutils/excel_sbol_utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import string
import rdflib
from openpyxl.worksheet import cell_range, worksheet
from openpyxl import load_workbook
from pathlib import Path

def check_name(nm_to_chck):
Expand All @@ -22,6 +23,11 @@ def check_name(nm_to_chck):
for ltr in nm_to_chck:
if ord(ltr) == 32:
nm_to_chck = nm_to_chck.replace(ltr, "_")
elif ord(ltr) == 45:
# Allow hyphens to be reinterpreted as underscores
nm_to_chck = nm_to_chck.replace(ltr, "_")
elif ord(ltr) == 46:
nm_to_chck = nm_to_chck.replace(ltr, "_")
elif ord(ltr) > 122 or ord(ltr) < 48:
# 122 is the highest decimal code number
# for common latin ltrs or arabic numbers
Expand Down Expand Up @@ -124,7 +130,7 @@ def read_variant_table(excel_file: Path) -> tuple[str, str, list[list]]:
LAST_VARIANT_ROW = 35

print(f'Loading workbook "{excel_file}"')
work_book = openpyxl.load_workbook(excel_file, data_only=True)
work_book = load_workbook(excel_file, data_only=True)
sheet = work_book[VARIANTS_SHEET]

# First, get the library name
Expand All @@ -133,7 +139,7 @@ def read_variant_table(excel_file: Path) -> tuple[str, str, list[list]]:

# Then get the base sequence
print('Extracting base sequence')
first_aa_column = get_column_number(FIRST_AMINO_ACID_COLUMN)
first_aa_column = col_to_num(FIRST_AMINO_ACID_COLUMN)
last_aa_column = row_ends(sheet, ORIGINAL_AMINO_ACID_ROW, first_aa_column)
# Get row from sheet and concatenate it into a string
row_iterator = sheet.iter_rows(min_row=ORIGINAL_AMINO_ACID_ROW, max_row=ORIGINAL_AMINO_ACID_ROW,
Expand Down
3 changes: 1 addition & 2 deletions excelutils/excel_sbol_utils/library2.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,7 @@ def subcomponents(rowobj):
template.assemblePrimaryStructure(comp_list)
#template.compile(assembly_method=None)

f#or comp in non_var_comps:

#or comp in non_var_comps:

rowobj.obj.masterTemplate = template
for var in variant_comps:
Expand Down
159 changes: 129 additions & 30 deletions excelutils/excel_sbol_utils/library3.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
import sbol3
import warnings
import excel_sbol_utils.helpers as helpers
import helpers as helpers


def objectType(rowobj):
Expand Down Expand Up @@ -34,6 +34,7 @@ def addToDescription(rowobj):
'different from': sbol3.SBOL_DIFFERENT_FROM,
'same orientation as': sbol3.SBOL_SAME_ORIENTATION_AS,
'different orientation from': sbol3.SBOL_SAME_ORIENTATION_AS}

def make_constraint(constraint, part_list, template):
m = constraint_pattern.match(constraint)
if not m:
Expand Down Expand Up @@ -63,16 +64,58 @@ def subcomponents(rowobj): #UPDATE TO WORK WITH CELL DICT, ALLOW CONSTRAINTS


if 'backbone' in rowobj.col_cell_dict:
temp = sbol3.Component(identity=f'{rowobj.obj.displayId}_ins_template', types=sbol3.SBO_DNA, name=f'{rowobj.obj.displayId}_ins_template')
newobj = sbol3.CombinatorialDerivation(identity=f'{rowobj.obj.displayId}_ins', template=temp, name=f'{rowobj.obj.displayId}_ins', strategy=sbol3.SBOL_ENUMERATE)
rowobj.doc.add(temp)
rowobj.doc.add(newobj)
rowobj.obj_dict[temp.display_id] = {'uri': temp.type_uri, 'object': temp,
'displayId': temp.display_id}
backbones = list(rowobj.col_cell_dict['backbone'].values())
back = True
oldobj = rowobj.obj
rowobj.obj = newobj
# If this row has a backbone, create a new combinatorial derivation

# Determine if there are multiple comps per part
multiple = False

for sub in rowobj.col_cell_dict['subcomp']:
if "," in rowobj.col_cell_dict['subcomp'][sub]:
multiple = True
break
else:
multiple = False

# 1. If there are multiple comps per part, create ins_templat

if multiple:
temp = sbol3.Component(identity=f'{rowobj.obj.displayId}_ins_template', types=sbol3.SBO_DNA)

newobj = sbol3.CombinatorialDerivation(identity=f'{rowobj.obj.displayId}_ins', template=temp, name=f'{rowobj.obj.name} insert', \
strategy=sbol3.SBOL_ENUMERATE, description=rowobj.obj.description)

rowobj.obj.description = None

rowobj.doc.add(temp) # Add the template
rowobj.doc.add(newobj) # Add the combdev _ins to the document connected to the template

rowobj.obj_dict[temp.display_id] = {'uri': temp.type_uri, 'object': temp,
'displayId': temp.display_id}
backbones = list(rowobj.col_cell_dict['backbone'].values())
backbones = backbones[0].split(", ")

back = True
oldobj = rowobj.obj
rowobj.obj = newobj
else:
# 2. Otherwise, create _ins without the template

# Create new component _ins without the template
newobj = sbol3.Component(identity=f'{rowobj.obj.displayId}_ins', name=f'{rowobj.obj.name} insert', \
description=rowobj.obj.description, types=sbol3.SBO_DNA, roles=sbol3.SO_ENGINEERED_REGION)

# Set description to None
rowobj.obj.description = None

rowobj.doc.add(newobj)

backbones = list(rowobj.col_cell_dict['backbone'].values())
backbones = backbones[0].split(", ")


back = True
oldobj = rowobj.obj
rowobj.obj = newobj
else:
back = False

Expand All @@ -89,19 +132,40 @@ def subcomponents(rowobj): #UPDATE TO WORK WITH CELL DICT, ALLOW CONSTRAINTS
variant_comps = []
comp_ind = 0

# Need to update for multiple backbones, as well as remove hardcoding
# Check SBOL Utilities for reasoning for multiple backbones

if back:
# Currently this code creates a template for the insertion of the backbone into the main combinatorialderivation
tempObj = rowobj.obj_dict[f'{oldobj.display_id}_template']['object']
sub = sbol3.LocalSubComponent(types=sbol3.SBO_DNA, name="Inserted construct")

sub = sbol3.LocalSubComponent(types=sbol3.SBO_DNA, name="Inserted Construct")
tempObj.features.append(sub)
backbone_sub = sbol3.VariableFeature(cardinality=sbol3.SBOL_ONE, variable=sub, variant_derivations=rowobj.obj)
oldobj.variable_features.append(backbone_sub)

subComp = sbol3.SubComponent(instance_of=rowobj.obj_dict[backbones[0]]['object'])
rowobj.obj_dict[f'{oldobj.display_id}_template']['object'].features.append(subComp)
constr1 = sbol3.Constraint(restriction=sbol3.SBOL_MEETS, object=subComp, subject=sub)
constr2 = sbol3.Constraint(restriction=sbol3.SBOL_MEETS, object=sub, subject=subComp)
rowobj.obj_dict[f'{oldobj.display_id}_template']['object'].constraints.append(constr1)
rowobj.obj_dict[f'{oldobj.display_id}_template']['object'].constraints.append(constr2)
if len(backbones) == 1:

subComp = sbol3.SubComponent(instance_of=rowobj.obj_dict[backbones[0]]['object'])
rowobj.obj_dict[f'{oldobj.display_id}_template']['object'].features.append(subComp)
constr1 = sbol3.Constraint(restriction=sbol3.SBOL_MEETS, object=subComp, subject=sub)
constr2 = sbol3.Constraint(restriction=sbol3.SBOL_MEETS, object=sub, subject=subComp)
rowobj.obj_dict[f'{oldobj.display_id}_template']['object'].constraints.append(constr1)
rowobj.obj_dict[f'{oldobj.display_id}_template']['object'].constraints.append(constr2)
else:

newLocalSub = sbol3.LocalSubComponent(name="Vector", types=sbol3.SBO_DNA)
tempObj.features.append(newLocalSub)

newVarFeature = sbol3.VariableFeature(variable=newLocalSub, variants=(rowobj.obj_dict[i]['object'] for i in backbones), cardinality=sbol3.SBOL_ONE)
oldobj.variable_features.append(newVarFeature)

constr1 = sbol3.Constraint(restriction=sbol3.SBOL_MEETS, object=newLocalSub, subject=sub)
constr2 = sbol3.Constraint(restriction=sbol3.SBOL_MEETS, object=sub, subject=newLocalSub)

rowobj.obj_dict[f'{oldobj.display_id}_template']['object'].constraints.append(constr1)
rowobj.obj_dict[f'{oldobj.display_id}_template']['object'].constraints.append(constr2)


else:
temp = rowobj.obj_dict[f'{rowobj.obj.display_id}_template']['object']
Expand All @@ -124,7 +188,7 @@ def subcomponents(rowobj): #UPDATE TO WORK WITH CELL DICT, ALLOW CONSTRAINTS

comp_ind += 1
else:
tempSub = sbol3.SubComponent(name=f'Part {comp_ind}', instance_of=f'{rowobj.obj_dict[comp]["uri"]}', orientation=sbol3.SBOL_INLINE)
tempSub = sbol3.SubComponent(name=f'Part {comp_ind + 1}', instance_of=f'{rowobj.obj_dict[comp]["uri"]}', orientation=sbol3.SBOL_INLINE)
temp.features.append(tempSub)
variant_comps.append(tempSub)
if comp_ind != 0:
Expand Down Expand Up @@ -202,6 +266,34 @@ def dataSource(rowobj):
rowobj.obj.update_all_dependents(id_map) # this function doesn't yet do everything it should
rowobj.data_source_id_to_update[old_id] = new_identity

if pref == 'URL for GenBank file' or pref == 'URL for FASTA file':
# Namespace is everything except the last part of the url
# Loop backward through the value until a '/' is found
# Everything before the '/' is the namespace
old_val = val

# Loop through the string backwards
for i in range(len(val) - 1, 0, -1):
if val[i] == '/':
# Everything before the '/' is the namespace
ns = val[:i]

# Everything after the '/' is the display id
val = val[i+1:len(val) - 3]

break
old_id = rowobj.obj.identity
rowobj.doc.change_object_namespace([rowobj.obj], ns)
new_id = rowobj.obj.identity
rowobj.data_source_id_to_update[old_id] = new_id
rowobj.obj.derived_from = [old_val]
if val != rowobj.obj.display_id:
new_identity = str(rowobj.obj.identity).replace(rowobj.obj.display_id, helpers.check_name(val))
id_map = {rowobj.obj.identity:new_identity}
rowobj.obj.set_identity(new_identity)
rowobj.obj.update_all_dependents(id_map) # this function doesn't yet do everything it should
rowobj.data_source_id_to_update[old_id] = new_identity

def sequence(rowobj):
for col in rowobj.col_cell_dict.keys():
val = rowobj.col_cell_dict[col]
Expand All @@ -218,13 +310,13 @@ def sequence(rowobj):

# removes spaces, enters, and makes all lower case
val = "".join(val.split())
val = val.replace(u"\ufeff", "").lower()
val = val.replace(u"\ufeff", "").upper()

# create sequence object
sequence = sbol3.Sequence(f"{rowobj.obj.displayId}_sequence",
elements=val)
if rowobj.obj.name is not None:
sequence.name = f"{rowobj.obj.name} Sequence"
sequence = sbol3.Sequence(f"{rowobj.obj.namespace}/{rowobj.obj.display_id}_sequence",
elements=val, encoding=sbol3.IUPAC_DNA_ENCODING, namespace=rowobj.obj.namespace)
# if rowobj.obj.name is not None:
# sequence.name = f"{rowobj.obj.name} Sequence"

rowobj.doc.add(sequence)

Expand All @@ -239,6 +331,11 @@ def sequence(rowobj):

def circular(rowobj): # NOT IMPLEMENTED
# if false add to linear collection if true add to types

tempObj = rowobj.obj
if rowobj.col_cell_dict['Circular'] not in tempObj.types:
tempObj.types.append(rowobj.col_cell_dict['Circular'])

pass

def finalProduct(rowobj):
Expand All @@ -253,28 +350,30 @@ def finalProduct(rowobj):

sbol_objs = doc.objects
sbol_objs_names = [x.name for x in sbol_objs]
if 'FinalProducts' not in sbol_objs_names:
colec = sbol3.Collection('FinalProducts', name='FinalProducts')
if 'Final Products' not in sbol_objs_names:
colec = sbol3.Collection('FinalProducts', name='Final Products')
colec.description = 'Final products desired for actual fabrication'

sbol_objs = doc.objects
sbol_objs_names = [x.name for x in sbol_objs]

doc.add(colec)
colec.members.append(rowobj.obj_uri)
else:
colec = sbol_objs[sbol_objs_names.index('FinalProducts')]
colec = sbol_objs[sbol_objs_names.index('Final Products')]
colec.members.append(rowobj.obj_uri)

if 'LinearDNAProducts' not in sbol_objs_names:
colec = sbol3.Collection('LinearDNAProducts', name='LinearDNAProducts')
if 'Linear DNA Products' not in sbol_objs_names:
colec = sbol3.Collection('LinearDNAProducts', name='Linear DNA Products')
colec.description = 'Linear DNA constructs to be fabricated'

sbol_objs = doc.objects
sbol_objs_names = [x.name for x in sbol_objs]

doc.add(colec)
colec.members.append(rowobj.obj)
else:
colec = sbol_objs[sbol_objs_names.index('LinearDNAProducts')]
colec = sbol_objs[sbol_objs_names.index('Linear DNA Products')]
colec.members.append(rowobj.obj)


Expand Down
21 changes: 21 additions & 0 deletions excelutils/excel_sbol_utils/temp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import sbol2
import sbol3
import excel_sbol_utils.library2 as l2
import excel_sbol_utils.library3 as l3
import excel_sbol_utils.helpers as help
import excel2sbol.comp_column_functions2 as ccf
import excel2sbol.converter as conv
import os

# Build a barebones combinatorial derivation object
# Attempt to build subcomponent on it

obj = sbol2.combinatorialderivation()

doc = sbol2.Document()
obj_dict = {}
sheet = 'Composite Parts'




Loading
Loading