python - How to speed up a .py script refering to millions of files? -
i have 2.5m files process .py script.
i'm using super calculator problem not power, python process open , close every time , loosing time.
i'm using loop every files in folder want convert script. ${line} refers file every line referring every files of folder.
is there way process files after opening .py script instead of looping python script?
there loop code :
### loop ### while : pythonsh ${ramdisk}/script.py -l ${ramdisk}/${line}.pdb -u '' -a hydrogens done exit
the python script tool convert .pdb .pdbqt files i've found autodocktools comes autodock4.
i modified script, adding -i
commandline option.
this let specify text file containing ligand filenames (one per line) , process them without restarting python.
you should able call as
pythonsh ./newscript.py -i ./list_of_files.txt -u '' -a hydrogens
note: untested! should work given, cautious!
#!/usr/bin/env python # # # # $header: /opt/cvs/python/packages/share1.5/autodocktools/utilities24/prepare_ligand4.py,v 1.5.4.1 2009/04/15 17:41:57 rhuey exp $ # # modified 2016/02/07 # hugh bothwell http://stackoverflow.com/users/33258 # added -i commandline option process multiple files # import os molkit import read autodocktools.moleculepreparation import ad4ligandpreparation # initialize command-line parameters #-l: ligand ligand_filename = none #-i: file containing ligand-filenames ligand_listfile = none # optional parameters verbose = none add_bonds = false #-a: repairs make: add bonds and/or hydrogens repairs = "" #-c default: add gasteiger charges charges_to_add = 'gasteiger' #-p preserve charges on specific atom types preserve_charge_types='' #-u: cleanup merging nphs_lps, nphs, lps cleanup = "nphs_lps" #-b named rotatable bond type(s) allow rotate #allowed_bonds = "" allowed_bonds = "backbone" #-r root root = 'auto' #-o outputfilename outputfilename = none #-f check_for_fragments check_for_fragments = false #-i bonds_to_inactivate bonds_to_inactivate = "" #-z inactivate_all_torsions inactivate_all_torsions = false #-g attach_nonbonded_fragments attach_nonbonded_fragments = false #-m mode mode = 'automatic' #-d dictionary dict = none def process_file(fname): mols = read(fname) if verbose: print 'read ', fname mol = mols[0] if len(mols)>1: if verbose: print "more 1 molecule in file" #use 1 molecule atoms ctr = 1 m in mols[1:]: ctr += 1 if len(m.allatoms)>len(mol.allatoms): mol = m if verbose: print "mol set ", ctr, "th molecule with", len(mol.allatoms), "atoms" coord_dict = {} in mol.allatoms: coord_dict[a] = a.coords mol.buildbondsbydistance() if charges_to_add not none: preserved = {} preserved_types = preserve_charge_types.split(',') t in preserved_types: if not len(t): continue ats = mol.allatoms.get(lambda x: x.autodock_element==t) in ats: if a.chargeset not none: preserved[a] = [a.chargeset, a.charge] if verbose: print "setting lpo mode=", mode, print "and outputfilename= ", outputfilename print "and check_for_fragments=", check_for_fragments print "and bonds_to_inactivate=", bonds_to_inactivate lpo = ad4ligandpreparation(mol, mode, repairs, charges_to_add, cleanup, allowed_bonds, root, outputfilename=outputfilename, dict=dict, check_for_fragments=check_for_fragments, bonds_to_inactivate=bonds_to_inactivate, inactivate_all_torsions=inactivate_all_torsions, attach_nonbonded_fragments=attach_nonbonded_fragments) #do atoms many bonds (?) #fix this: peptide ligand (???) # ??use ispeptide decide chargeset?? if charges_to_add not none: #restore previous charges atom, chargelist in preserved.items(): atom._charges[chargelist[0]] = chargelist[1] atom.chargeset = chargelist[0] if verbose: print "returning ", mol.returncode bad_list = [] in mol.allatoms: if a.coords!=coord_dict[a]: bad_list.append(a) if len(bad_list): print len(bad_list), ' atom coordinates changed!' in bad_list: print a.name, ":", coord_dict[a], ' -> ', a.coords else: if verbose: print "no change in atomic coordinates" if mol.returncode != 0: sys.stderr.write(mol.returnmsg + "\n") # sys.exit(mol.returncode) if __name__ == '__main__': import sys import getopt def usage(): "print helpful, accurate usage statement stdout." print "usage: prepare_ligand4.py -l filename" print print " description of command..." print " -l ligand_filename (.pdb or .mol2 or .pdbq format)" print " -i list_of_filenames.txt (.pdb or .mol2 or .pdbq format)" print " optional parameters:" print " [-v] verbose output" print " [-o pdbqt_filename] (default output filename ligand_filename_stem + .pdbqt)" print " [-d] dictionary write types list , number of active torsions " print " [-a] type(s) of repairs make:\n\t\t bonds_hydrogens, bonds, hydrogens (default no repairs)" print " [-c] not add charges (default add gasteiger charges)" print " [-p] preserve input charges on atom type, eg -p zn" print " (default not preserve charges on specific atom type)" print " [-u] cleanup type:\n\t\t nphs_lps, nphs, lps, '' (default 'nphs_lps') " print " [-b] type(s) of bonds allow rotate " print " (default sets 'backbone' rotatable , 'amide' + 'guanidinium' non-rotatable)" print " [-r] index root" print " [-f] check , use largest non-bonded fragment (default not this)" print " [-m] interactive (default automatic output)" print " [-i] string of bonds inactivate composed of " print " of zero-based atom indices eg 5_13_2_10 " print " inactivate atoms[5]-atoms[13] bond " print " , atoms[2]-atoms[10] bond " print " (default not inactivate specific bonds)" print " [-z] inactivate active torsions " print " (default leave rotatable active except amide , guanidinium)" print " [-g] attach nonbonded fragments " print " (default not this)" # process command arguments try: opt_list, args = getopt.getopt(sys.argv[1:], 'l:i:vo:d:a:cp:u:b:r:mfi:zgh') except getopt.getopterror, msg: print 'prepare_ligand4.py: %s' %msg usage() sys.exit(2) #'l:vo:d:a:cku:b:r:mfi:zg' o, in opt_list: #print "o=", o, " a=", if o in ('-l', '--l'): ligand_filename = if verbose: print 'set ligand_filename ', if o in ('-i', '--i'): ligand_listfile = if verbose: print 'set ligand_listfile ', if o in ('-v', '--v'): verbose = true if verbose: print 'set verbose ', true if o in ('-o', '--o'): outputfilename = if verbose: print 'set outputfilename ', if o in ('-d', '--d'): dict = if verbose: print 'set dict ', if o in ('-a', '--a'): repairs = if verbose: print 'set repairs ', if o in ('-c', '--c'): charges_to_add = none if verbose: print 'do not add charges' if o in ('-p', '--p'): preserve_charge_types+=a preserve_charge_types+=',' if verbose: print 'preserve initial charges on ', preserve_charge_types if o in ('-u', '--u'): cleanup = if verbose: print 'set cleanup merge ', if o in ('-b', '--b'): allowed_bonds = if verbose: print 'allow ', a, 'bonds set rotate' if o in ('-r', '--r'): root = if verbose: print 'set root ', root if o in ('-f', '--f'): check_for_fragments = true if verbose: print 'set check_for_fragments true' if o in ('-m', '--m'): mode = if verbose: print 'set mode ', if o in ('-i', '--i'): bonds_to_inactivate = if verbose: print 'set bonds_to_inactivate ', if o in ('-z', '--z'): inactivate_all_torsions = true if verbose: print 'set inactivate_all_torsions ', inactivate_all_torsions if o in ('-g', '--g'): attach_nonbonded_fragments = true if verbose: print 'set attach_nonbonded_fragments ', attach_nonbonded_fragments if o in ('-h', '--'): usage() sys.exit() if ligand_filename: process_file(ligand_filename) elif ligand_listfile: # python 2.5 not support `with` # open(ligand_listfile) inf: # fname in inf: # process_file(fname.rstrip()) inf = open(ligand_listfile) fname in inf: process_file(fname.rstrip()) inf.close() else: print 'prepare_ligand4: either -l (ligand filename) or -i (ligand listfile) must specified.' usage() sys.exit() # execute command type: # prepare_ligand4.py -l pdb_file -v
Comments
Post a Comment