python - How to speed up a .py script refering to millions of files? -


i have 2.5m files process .py script.

i'm using super calculator problem not power, python process open , close every time , loosing time.

i'm using loop every files in folder want convert script. ${line} refers file every line referring every files of folder.

is there way process files after opening .py script instead of looping python script?

there loop code :

### loop ### while : pythonsh ${ramdisk}/script.py -l ${ramdisk}/${line}.pdb -u '' -a hydrogens  done  exit 

the python script tool convert .pdb .pdbqt files i've found autodocktools comes autodock4.

i modified script, adding -i commandline option.

this let specify text file containing ligand filenames (one per line) , process them without restarting python.

you should able call as

pythonsh ./newscript.py -i ./list_of_files.txt -u '' -a hydrogens 

note: untested! should work given, cautious!

#!/usr/bin/env python # #  # # $header: /opt/cvs/python/packages/share1.5/autodocktools/utilities24/prepare_ligand4.py,v 1.5.4.1 2009/04/15 17:41:57 rhuey exp $ # # modified 2016/02/07 # hugh bothwell  http://stackoverflow.com/users/33258 # added -i commandline option process multiple files # import os   molkit import read autodocktools.moleculepreparation import ad4ligandpreparation  # initialize command-line parameters #-l: ligand ligand_filename =  none #-i: file containing ligand-filenames ligand_listfile =  none # optional parameters verbose = none add_bonds = false #-a: repairs make: add bonds and/or hydrogens repairs = "" #-c  default: add gasteiger charges  charges_to_add = 'gasteiger' #-p preserve charges on specific atom types preserve_charge_types='' #-u: cleanup merging nphs_lps, nphs, lps cleanup  = "nphs_lps" #-b named rotatable bond type(s) allow rotate #allowed_bonds = "" allowed_bonds = "backbone" #-r  root root = 'auto' #-o outputfilename outputfilename = none #-f check_for_fragments check_for_fragments = false #-i bonds_to_inactivate bonds_to_inactivate = "" #-z inactivate_all_torsions inactivate_all_torsions = false #-g attach_nonbonded_fragments attach_nonbonded_fragments = false #-m mode  mode = 'automatic' #-d dictionary dict = none  def process_file(fname):     mols = read(fname)     if verbose: print 'read ', fname     mol = mols[0]     if len(mols)>1:         if verbose:              print "more 1 molecule in file"         #use 1 molecule atoms         ctr = 1         m in mols[1:]:             ctr += 1             if len(m.allatoms)>len(mol.allatoms):                 mol = m                 if verbose:                     print "mol set ", ctr, "th molecule with", len(mol.allatoms), "atoms"     coord_dict = {}     in mol.allatoms: coord_dict[a] = a.coords      mol.buildbondsbydistance()     if charges_to_add not none:         preserved = {}         preserved_types = preserve_charge_types.split(',')          t in preserved_types:             if not len(t): continue             ats = mol.allatoms.get(lambda x: x.autodock_element==t)             in ats:                 if a.chargeset not none:                     preserved[a] = [a.chargeset, a.charge]      if verbose:         print "setting lpo mode=", mode,         print "and outputfilename= ", outputfilename         print "and check_for_fragments=", check_for_fragments         print "and bonds_to_inactivate=", bonds_to_inactivate     lpo = ad4ligandpreparation(mol, mode, repairs, charges_to_add,                              cleanup, allowed_bonds, root,                              outputfilename=outputfilename,                             dict=dict, check_for_fragments=check_for_fragments,                             bonds_to_inactivate=bonds_to_inactivate,                              inactivate_all_torsions=inactivate_all_torsions,                             attach_nonbonded_fragments=attach_nonbonded_fragments)     #do atoms many bonds (?)     #fix this: peptide ligand (???)     #          ??use ispeptide decide chargeset??     if charges_to_add not none:         #restore previous charges         atom, chargelist in preserved.items():             atom._charges[chargelist[0]] = chargelist[1]             atom.chargeset = chargelist[0]     if verbose: print "returning ", mol.returncode      bad_list = []     in mol.allatoms:         if a.coords!=coord_dict[a]: bad_list.append(a)     if len(bad_list):         print len(bad_list), ' atom coordinates changed!'             in bad_list:             print a.name, ":", coord_dict[a], ' -> ', a.coords     else:         if verbose: print "no change in atomic coordinates"     if mol.returncode != 0:          sys.stderr.write(mol.returnmsg + "\n")     # sys.exit(mol.returncode)  if __name__ == '__main__':     import sys     import getopt      def usage():         "print helpful, accurate usage statement stdout."         print "usage: prepare_ligand4.py -l filename"         print         print "    description of command..."         print "         -l     ligand_filename       (.pdb or .mol2 or .pdbq format)"         print "         -i     list_of_filenames.txt (.pdb or .mol2 or .pdbq format)"         print "    optional parameters:"         print "        [-v]    verbose output"         print "        [-o pdbqt_filename] (default output filename ligand_filename_stem + .pdbqt)"         print "        [-d]    dictionary write types list , number of active torsions "          print "        [-a]    type(s) of repairs make:\n\t\t bonds_hydrogens, bonds, hydrogens (default no repairs)"         print "        [-c]    not add charges (default add gasteiger charges)"         print "        [-p]    preserve input charges on atom type, eg -p zn"         print "               (default not preserve charges on specific atom type)"         print "        [-u]    cleanup type:\n\t\t nphs_lps, nphs, lps, '' (default 'nphs_lps') "         print "        [-b]    type(s) of bonds allow rotate "         print "               (default sets 'backbone' rotatable , 'amide' + 'guanidinium' non-rotatable)"         print "        [-r]    index root"         print "        [-f]    check , use largest non-bonded fragment (default not this)"         print "        [-m]    interactive (default automatic output)"         print "        [-i]    string of bonds inactivate composed of "         print "                   of zero-based atom indices eg 5_13_2_10  "         print "                   inactivate atoms[5]-atoms[13] bond "         print "                               , atoms[2]-atoms[10] bond "         print "                      (default not inactivate specific bonds)"         print "        [-z]    inactivate active torsions     "         print "                      (default leave rotatable active except amide , guanidinium)"         print "        [-g]    attach nonbonded fragments "         print "                      (default not this)"      # process command arguments     try:         opt_list, args = getopt.getopt(sys.argv[1:], 'l:i:vo:d:a:cp:u:b:r:mfi:zgh')     except getopt.getopterror, msg:         print 'prepare_ligand4.py: %s' %msg         usage()         sys.exit(2)      #'l:vo:d:a:cku:b:r:mfi:zg'     o, in opt_list:         #print "o=", o, " a=",         if o in ('-l', '--l'):             ligand_filename =             if verbose: print 'set ligand_filename ',         if o in ('-i', '--i'):             ligand_listfile =             if verbose: print 'set ligand_listfile ',         if o in ('-v', '--v'):             verbose = true             if verbose: print 'set verbose ', true         if o in ('-o', '--o'):             outputfilename =             if verbose: print 'set outputfilename ',         if o in ('-d', '--d'):             dict =             if verbose: print 'set dict ',         if o in ('-a', '--a'):             repairs =             if verbose: print 'set repairs ',         if o in ('-c', '--c'):             charges_to_add = none             if verbose: print 'do not add charges'         if o in ('-p', '--p'):             preserve_charge_types+=a             preserve_charge_types+=','             if verbose: print 'preserve initial charges on ', preserve_charge_types         if o in ('-u', '--u'):             cleanup  =             if verbose: print 'set cleanup merge ',         if o in ('-b', '--b'):             allowed_bonds =             if verbose: print 'allow ', a, 'bonds set rotate'         if o in ('-r', '--r'):             root =             if verbose: print 'set root ', root         if o in ('-f', '--f'):             check_for_fragments = true             if verbose: print 'set check_for_fragments true'         if o in ('-m', '--m'):             mode =             if verbose: print 'set mode ',         if o in ('-i', '--i'):             bonds_to_inactivate =             if verbose: print 'set bonds_to_inactivate ',         if o in ('-z', '--z'):             inactivate_all_torsions = true             if verbose: print 'set inactivate_all_torsions ', inactivate_all_torsions         if o in ('-g', '--g'):             attach_nonbonded_fragments = true             if verbose: print 'set attach_nonbonded_fragments ', attach_nonbonded_fragments         if o in ('-h', '--'):             usage()             sys.exit()      if ligand_filename:         process_file(ligand_filename)     elif ligand_listfile:         # python 2.5 not support `with`         # open(ligand_listfile) inf:         #     fname in inf:         #         process_file(fname.rstrip())         inf = open(ligand_listfile)         fname in inf:             process_file(fname.rstrip())         inf.close()     else:         print 'prepare_ligand4: either -l (ligand filename) or -i (ligand listfile) must specified.'         usage()         sys.exit()  # execute command type: # prepare_ligand4.py -l pdb_file -v 

Comments

Popular posts from this blog

javascript - jQuery: Add class depending on URL in the best way -

caching - How to check if a url path exists in the service worker cache -

Redirect to a HTTPS version using .htaccess -