Louis BECQUEY

Merge branch 'stage_aglae'

Showing 1 changed file with 121 additions and 3 deletions
...@@ -40,7 +40,8 @@ from Bio.SeqIO.FastaIO import FastaIterator, SimpleFastaParser ...@@ -40,7 +40,8 @@ from Bio.SeqIO.FastaIO import FastaIterator, SimpleFastaParser
40 from Bio.Seq import MutableSeq 40 from Bio.Seq import MutableSeq
41 from Bio.SeqRecord import SeqRecord 41 from Bio.SeqRecord import SeqRecord
42 from Bio.Align import MultipleSeqAlignment 42 from Bio.Align import MultipleSeqAlignment
43 - 43 +from collections import defaultdict
44 +from Bio.PDB.PDBIO import Select
44 runDir = os.getcwd() 45 runDir = os.getcwd()
45 46
46 def trace_unhandled_exceptions(func): 47 def trace_unhandled_exceptions(func):
...@@ -155,6 +156,123 @@ class SelectivePortionSelector(object): ...@@ -155,6 +156,123 @@ class SelectivePortionSelector(object):
155 return 1 156 return 1
156 157
157 158
159 +_select=Select()
160 +
161 +def save_mmcif(ioobj, out_file, select=_select, preserve_atom_numbering=False):
162 + if isinstance(out_file, str):
163 + fp = open(out_file, "w")
164 + close_file = True
165 + else:
166 + fp = out_file
167 + close_file = False
168 + #fp = open(out_file, "w")
169 + #close_file=True
170 + atom_dict = defaultdict(list)
171 +
172 + for model in ioobj.structure.get_list():
173 + if not select.accept_model(model):
174 + continue
175 + # mmCIF files with a single model have it specified as model 1
176 + if model.serial_num == 0:
177 + model_n = "1"
178 + else:
179 + model_n = str(model.serial_num)
180 + # This is used to write label_entity_id and label_asym_id and
181 + # increments from 1, changing with each molecule
182 + entity_id = 0
183 + if not preserve_atom_numbering:
184 + atom_number = 1
185 + for chain in model.get_list():
186 + if not select.accept_chain(chain):
187 + continue
188 + chain_id = chain.get_id()
189 + if chain_id == " ":
190 + chain_id = "."
191 + # This is used to write label_seq_id and increments from 1,
192 + # remaining blank for hetero residues
193 +
194 + prev_residue_type = ""
195 + prev_resname = ""
196 + for residue in chain.get_unpacked_list():
197 + if not select.accept_residue(residue):
198 + continue
199 + hetfield, resseq, icode = residue.get_id()
200 + if hetfield == " ":
201 + residue_type = "ATOM"
202 + label_seq_id = str(resseq)
203 +
204 + else:
205 + residue_type = "HETATM"
206 + label_seq_id = "."
207 + resseq = str(resseq)
208 + if icode == " ":
209 + icode = "?"
210 + resname = residue.get_resname()
211 + # Check if the molecule changes within the chain
212 + # This will always increment for the first residue in a
213 + # chain due to the starting values above
214 + if residue_type != prev_residue_type or (
215 + residue_type == "HETATM" and resname != prev_resname
216 + ):
217 + entity_id += 1
218 + prev_residue_type = residue_type
219 + prev_resname = resname
220 + label_asym_id = ioobj._get_label_asym_id(entity_id)
221 + for atom in residue.get_unpacked_list():
222 + if select.accept_atom(atom):
223 + atom_dict["_atom_site.group_PDB"].append(residue_type)
224 + if preserve_atom_numbering:
225 + atom_number = atom.get_serial_number()
226 + atom_dict["_atom_site.id"].append(str(atom_number))
227 + if not preserve_atom_numbering:
228 + atom_number += 1
229 + element = atom.element.strip()
230 + if element == "":
231 + element = "?"
232 + atom_dict["_atom_site.type_symbol"].append(element)
233 + atom_dict["_atom_site.label_atom_id"].append(
234 + atom.get_name().strip()
235 + )
236 + altloc = atom.get_altloc()
237 + if altloc == " ":
238 + altloc = "."
239 + atom_dict["_atom_site.label_alt_id"].append(altloc)
240 + atom_dict["_atom_site.label_comp_id"].append(
241 + resname.strip()
242 + )
243 + atom_dict["_atom_site.label_asym_id"].append(label_asym_id)
244 + # The entity ID should be the same for similar chains
245 + # However this is non-trivial to calculate so we write "?"
246 + atom_dict["_atom_site.label_entity_id"].append("?")
247 + atom_dict["_atom_site.label_seq_id"].append(label_seq_id)
248 + atom_dict["_atom_site.pdbx_PDB_ins_code"].append(icode)
249 + coord = atom.get_coord()
250 + atom_dict["_atom_site.Cartn_x"].append("%.3f" % coord[0])
251 + atom_dict["_atom_site.Cartn_y"].append("%.3f" % coord[1])
252 + atom_dict["_atom_site.Cartn_z"].append("%.3f" % coord[2])
253 + atom_dict["_atom_site.occupancy"].append(
254 + str(atom.get_occupancy())
255 + )
256 + atom_dict["_atom_site.B_iso_or_equiv"].append(
257 + str(atom.get_bfactor())
258 + )
259 + atom_dict["_atom_site.auth_seq_id"].append(resseq)
260 + atom_dict["_atom_site.auth_asym_id"].append(chain_id)
261 + atom_dict["_atom_site.pdbx_PDB_model_num"].append(model_n)
262 +
263 + # Data block name is the structure ID with special characters removed
264 + structure_id = ioobj.structure.id
265 + for c in ["#", "$", "'", '"', "[", "]", " ", "\t", "\n"]:
266 + structure_id = structure_id.replace(c, "")
267 + atom_dict["data_"] = structure_id
268 +
269 + # Set the dictionary and write out using the generic dictionary method
270 + ioobj.dic = atom_dict
271 + ioobj._save_dict(fp)
272 + if close_file:
273 + fp.close()
274 +
275 +
158 class Chain: 276 class Chain:
159 """ 277 """
160 The object which stores all our data and the methods to process it. 278 The object which stores all our data and the methods to process it.
...@@ -325,8 +443,8 @@ class Chain: ...@@ -325,8 +443,8 @@ class Chain:
325 # Save that renumbered selection on the mmCIF object s to file 443 # Save that renumbered selection on the mmCIF object s to file
326 ioobj = pdb.MMCIFIO() 444 ioobj = pdb.MMCIFIO()
327 ioobj.set_structure(t) 445 ioobj.set_structure(t)
328 - ioobj.save(self.file) 446 +
329 - 447 + save_mmcif(ioobj, self.file)
330 448
331 notify(status) 449 notify(status)
332 450
......