algorithm - Extending Python's os.walk function on FTP server -
how can make os.walk
traverse directory tree of ftp database (located on remote server)? way code structured (comments provided):
import fnmatch, os, ftplib def find(pattern, startdir=os.curdir): #find function taking variables both desired file , starting directory (thisdir, subshere, fileshere) in os.walk(startdir): #each of variables change directory tree walked name in subshere + fileshere: #going through of files , subdirectories if fnmatch.fnmatch(name, pattern): #if name of 1 of files or subs same inputted name fullpath = os.path.join(thisdir, name) #fullpath equals concatenation of directory , name yield fullpath #return fullpath anew each time def findlist(pattern, startdir = os.curdir, dosort=false): matches = list(find(pattern, startdir)) #find arguments pattern , startdir put list data structure if dosort: matches.sort() #isn't dosort automatically false? statement different same thing line in between return matches #def ftp( #specifying search. if __name__ == '__main__': import sys namepattern, startdir = sys.argv[1], sys.argv[2] name in find(namepattern, startdir): print (name)
i thinking need define new function (i.e., def ftp()
) add functionality code above. however, afraid os.walk
function will, default, walk directory trees of computer code run from.
is there way can extend functionality of os.walk
able traverse remote directory tree (via ftp)?
all need utilizing python's ftplib
module. since os.walk()
based on breadth-first search algorithm need find directories , file names @ each iteration, continue traversing recursively first directory. implemented this algorithm 2 years ago using heart of ftpwalker, optimum package traversing extremely large directory trees through ftp.
from os import path ospath class ftpwalk: """ class contain corresponding functions traversing ftp servers using bfs algorithm. """ def __init__(self, connection): self.connection = connection def listdir(self, _path): """ return files , directory names within path (directory) """ file_list, dirs, nondirs = [], [], [] try: self.connection.cwd(_path) except exception exp: print ("the current path : ", self.connection.pwd(), exp.__str__(),_path) return [], [] else: self.connection.retrlines('list', lambda x: file_list.append(x.split())) info in file_list: ls_type, name = info[0], info[-1] if ls_type.startswith('d'): dirs.append(name) else: nondirs.append(name) return dirs, nondirs def walk(self, path='/'): """ walk through ftp server's directory tree, based on bfs algorithm. """ dirs, nondirs = self.listdir(path) yield path, dirs, nondirs name in dirs: path = ospath.join(path, name) yield self.walk(path) # in python2 use: # path, dirs, nondirs in self.walk(path): # yield path, dirs, nondirs self.connection.cwd('..') path = ospath.dirname(path)
now using class, can create connection object using ftplib
module , pass the object ftpwalk
object , loop on walk()
function:
in [2]: test import ftpwalk in [3]: import ftplib in [4]: connection = ftplib.ftp("ftp.uniprot.org") in [5]: connection.login() out[5]: '230 login successful.' in [6]: ftpwalk = ftpwalk(connection) in [7]: in ftpwalk.walk(): print(i) ...: ('/', ['pub'], []) ('/pub', ['databases'], ['robots.txt']) ('/pub/databases', ['uniprot'], []) ('/pub/databases/uniprot', ['current_release', 'previous_releases'], ['license', 'current_release/readme', 'current_release/knowledgebase/complete', 'previous_releases/', 'current_release/relnotes.txt', 'current_release/uniref']) ('/pub/databases/uniprot/current_release', ['decoy', 'knowledgebase', 'rdf', 'uniparc', 'uniref'], ['readme', 'release.metalink', 'changes.html', 'news.html', 'relnotes.txt']) ... ... ...
Comments
Post a Comment