Showing
1 changed file
with
121 additions
and
3 deletions
... | @@ -40,7 +40,8 @@ from Bio.SeqIO.FastaIO import FastaIterator, SimpleFastaParser | ... | @@ -40,7 +40,8 @@ from Bio.SeqIO.FastaIO import FastaIterator, SimpleFastaParser |
40 | from Bio.Seq import MutableSeq | 40 | from Bio.Seq import MutableSeq |
41 | from Bio.SeqRecord import SeqRecord | 41 | from Bio.SeqRecord import SeqRecord |
42 | from Bio.Align import MultipleSeqAlignment | 42 | from Bio.Align import MultipleSeqAlignment |
43 | - | 43 | +from collections import defaultdict |
44 | +from Bio.PDB.PDBIO import Select | ||
44 | runDir = os.getcwd() | 45 | runDir = os.getcwd() |
45 | 46 | ||
46 | def trace_unhandled_exceptions(func): | 47 | def trace_unhandled_exceptions(func): |
... | @@ -155,6 +156,123 @@ class SelectivePortionSelector(object): | ... | @@ -155,6 +156,123 @@ class SelectivePortionSelector(object): |
155 | return 1 | 156 | return 1 |
156 | 157 | ||
157 | 158 | ||
159 | +_select=Select() | ||
160 | + | ||
161 | +def save_mmcif(ioobj, out_file, select=_select, preserve_atom_numbering=False): | ||
162 | + if isinstance(out_file, str): | ||
163 | + fp = open(out_file, "w") | ||
164 | + close_file = True | ||
165 | + else: | ||
166 | + fp = out_file | ||
167 | + close_file = False | ||
168 | + #fp = open(out_file, "w") | ||
169 | + #close_file=True | ||
170 | + atom_dict = defaultdict(list) | ||
171 | + | ||
172 | + for model in ioobj.structure.get_list(): | ||
173 | + if not select.accept_model(model): | ||
174 | + continue | ||
175 | + # mmCIF files with a single model have it specified as model 1 | ||
176 | + if model.serial_num == 0: | ||
177 | + model_n = "1" | ||
178 | + else: | ||
179 | + model_n = str(model.serial_num) | ||
180 | + # This is used to write label_entity_id and label_asym_id and | ||
181 | + # increments from 1, changing with each molecule | ||
182 | + entity_id = 0 | ||
183 | + if not preserve_atom_numbering: | ||
184 | + atom_number = 1 | ||
185 | + for chain in model.get_list(): | ||
186 | + if not select.accept_chain(chain): | ||
187 | + continue | ||
188 | + chain_id = chain.get_id() | ||
189 | + if chain_id == " ": | ||
190 | + chain_id = "." | ||
191 | + # This is used to write label_seq_id and increments from 1, | ||
192 | + # remaining blank for hetero residues | ||
193 | + | ||
194 | + prev_residue_type = "" | ||
195 | + prev_resname = "" | ||
196 | + for residue in chain.get_unpacked_list(): | ||
197 | + if not select.accept_residue(residue): | ||
198 | + continue | ||
199 | + hetfield, resseq, icode = residue.get_id() | ||
200 | + if hetfield == " ": | ||
201 | + residue_type = "ATOM" | ||
202 | + label_seq_id = str(resseq) | ||
203 | + | ||
204 | + else: | ||
205 | + residue_type = "HETATM" | ||
206 | + label_seq_id = "." | ||
207 | + resseq = str(resseq) | ||
208 | + if icode == " ": | ||
209 | + icode = "?" | ||
210 | + resname = residue.get_resname() | ||
211 | + # Check if the molecule changes within the chain | ||
212 | + # This will always increment for the first residue in a | ||
213 | + # chain due to the starting values above | ||
214 | + if residue_type != prev_residue_type or ( | ||
215 | + residue_type == "HETATM" and resname != prev_resname | ||
216 | + ): | ||
217 | + entity_id += 1 | ||
218 | + prev_residue_type = residue_type | ||
219 | + prev_resname = resname | ||
220 | + label_asym_id = ioobj._get_label_asym_id(entity_id) | ||
221 | + for atom in residue.get_unpacked_list(): | ||
222 | + if select.accept_atom(atom): | ||
223 | + atom_dict["_atom_site.group_PDB"].append(residue_type) | ||
224 | + if preserve_atom_numbering: | ||
225 | + atom_number = atom.get_serial_number() | ||
226 | + atom_dict["_atom_site.id"].append(str(atom_number)) | ||
227 | + if not preserve_atom_numbering: | ||
228 | + atom_number += 1 | ||
229 | + element = atom.element.strip() | ||
230 | + if element == "": | ||
231 | + element = "?" | ||
232 | + atom_dict["_atom_site.type_symbol"].append(element) | ||
233 | + atom_dict["_atom_site.label_atom_id"].append( | ||
234 | + atom.get_name().strip() | ||
235 | + ) | ||
236 | + altloc = atom.get_altloc() | ||
237 | + if altloc == " ": | ||
238 | + altloc = "." | ||
239 | + atom_dict["_atom_site.label_alt_id"].append(altloc) | ||
240 | + atom_dict["_atom_site.label_comp_id"].append( | ||
241 | + resname.strip() | ||
242 | + ) | ||
243 | + atom_dict["_atom_site.label_asym_id"].append(label_asym_id) | ||
244 | + # The entity ID should be the same for similar chains | ||
245 | + # However this is non-trivial to calculate so we write "?" | ||
246 | + atom_dict["_atom_site.label_entity_id"].append("?") | ||
247 | + atom_dict["_atom_site.label_seq_id"].append(label_seq_id) | ||
248 | + atom_dict["_atom_site.pdbx_PDB_ins_code"].append(icode) | ||
249 | + coord = atom.get_coord() | ||
250 | + atom_dict["_atom_site.Cartn_x"].append("%.3f" % coord[0]) | ||
251 | + atom_dict["_atom_site.Cartn_y"].append("%.3f" % coord[1]) | ||
252 | + atom_dict["_atom_site.Cartn_z"].append("%.3f" % coord[2]) | ||
253 | + atom_dict["_atom_site.occupancy"].append( | ||
254 | + str(atom.get_occupancy()) | ||
255 | + ) | ||
256 | + atom_dict["_atom_site.B_iso_or_equiv"].append( | ||
257 | + str(atom.get_bfactor()) | ||
258 | + ) | ||
259 | + atom_dict["_atom_site.auth_seq_id"].append(resseq) | ||
260 | + atom_dict["_atom_site.auth_asym_id"].append(chain_id) | ||
261 | + atom_dict["_atom_site.pdbx_PDB_model_num"].append(model_n) | ||
262 | + | ||
263 | + # Data block name is the structure ID with special characters removed | ||
264 | + structure_id = ioobj.structure.id | ||
265 | + for c in ["#", "$", "'", '"', "[", "]", " ", "\t", "\n"]: | ||
266 | + structure_id = structure_id.replace(c, "") | ||
267 | + atom_dict["data_"] = structure_id | ||
268 | + | ||
269 | + # Set the dictionary and write out using the generic dictionary method | ||
270 | + ioobj.dic = atom_dict | ||
271 | + ioobj._save_dict(fp) | ||
272 | + if close_file: | ||
273 | + fp.close() | ||
274 | + | ||
275 | + | ||
158 | class Chain: | 276 | class Chain: |
159 | """ | 277 | """ |
160 | The object which stores all our data and the methods to process it. | 278 | The object which stores all our data and the methods to process it. |
... | @@ -325,8 +443,8 @@ class Chain: | ... | @@ -325,8 +443,8 @@ class Chain: |
325 | # Save that renumbered selection on the mmCIF object s to file | 443 | # Save that renumbered selection on the mmCIF object s to file |
326 | ioobj = pdb.MMCIFIO() | 444 | ioobj = pdb.MMCIFIO() |
327 | ioobj.set_structure(t) | 445 | ioobj.set_structure(t) |
328 | - ioobj.save(self.file) | 446 | + |
329 | - | 447 | + save_mmcif(ioobj, self.file) |
330 | 448 | ||
331 | notify(status) | 449 | notify(status) |
332 | 450 | ... | ... |
-
Please register or login to post a comment