// FileManagementSystem.h #ifndef dset__FileManagementSystem_H #define dset__FileManagementSystem_H // David Adams // November 2005 // // Entry point for accessing and storing physical and logical // files. // // Data // ---- // FMS objects (i.e. those of type FileManagementSystem) hold the // following data // 1. A list of file protocols that are allowed for output // 2. An ordered list of extended protocols // 3. A list of input replica file catalogs // 4. A filter command for mapping input URL's // 5. A storage element (SE) // 6. An output replica file catalog // 7. An SE alias // // Accessing files // --------------- // For file retrieval (get), an input URL is specified and and the FMS // attempts to use it to locate an acceptable output URL. This may // require staging and or copying the file. The protocol of the output // file is one of the acceptable ones and is chosen in accordance with // the replica order. This process is called URL resolution. // // Known protocols include // file, http, lfn, guid, gsiftp, srm, dcap, rfio, glite // and the acceptable protocols are typically specified by the // application that will consume the file. // // Extended protocols include the protocols themselves, the protocol // plus some or all of the following URL (e.g. http://www.mysite.org). // URL extensions are not yet fully supported. // // Extended protocols may also include aliases with other restrictions // or implications for file handling. At present, only the following // alias for file is suppported: // copy - copy of the file // (can be extended with copy://mydir/mysubdir) // Future additions might include local, nfs and afs. // // Resolution follows these steps: // 0. The acceptable and ordered extended protocols are combined to // form the final extended protocol list. // 1. If the input URL is logical (guid or lfn) each of the file // replica catalogs is called to obtain a list of replica URL's. // These lists are concatenated to produce an initial list of // candidate protocols. If the URL is is physical (not guid or lfn), // the list is formed with that single entry. // 2. Each of the candidate physical URL's is mapped to a new list of // physical URL's using the input URL filter. A new map of candidate // URL's is formed by concatenating these lists. If there is no // filter, then the original list is retained. // 3. For each of the final extended protocols and attempt is made // to match each of the candidate URL's. The match may be direct // (extended protocol string matches the beginning of the URL), // by copy (if the extended protocol is copy), or indirect, e.g. // by SRM mapping. The first extended protocol to have a candidate // match is used. If there are multiple matches, direct matches are // favored over indirect. // // Copying files // ------------- // A file copy may be requested by specifying an input URL and an // output destination (directory or filename). The algorithm is the // same as for get except that the destination URL replaces the // final extended protocol list. // // Storing files // ------------- // If an SE is defined, then the put command copies it to the SE // and, if an SE replica file catalog is additionally defined, registers // it in that catalog. // // An SE URL is generated when the copy is put into the SE. If an SE // alias name is defined, it is used to update the SE URL, i.e. // replace the SE string. If there is no catalog, the updated SE // URL is returned. Otherwise it is registered in the catalog // and a logical URL natural to the is generated. A GUID is returned // for pool file catalogs. // // Cache // ----- // The FMS maintains a cache recording the associations between // input and output URL's and an FMS get may use this cache to // avoid extracting files from the SE. #include #include #include #include "dataset_util/Environment.h" #include "dataset_util/Time.h" #include "dataset_file/FileCatalog.h" namespace dset { class FileManagementSystem { public: // typedefs // Names for protocols, etc. typedef std::string Name; // List of names. typedef std::vector NameList; // List of file catalog pointers. typedef std::vector FCList; // URL map. typedef std::map UrlMap; // Time map. typedef std::map TimeMap; public: // static methods // Create a simple FMS and set it as the default. // The resolve protocols are "file" and the SE is a local // directory. No replica catalogs are defined. // Returns 0 for success. static int create_simple_instance(); // Return the default FMS. static FileManagementSystem& default_instance(); private: // data mutable int m_err; int m_verbose; FCList m_fcs; NameList m_allowed; NameList m_order; NameList m_prots; Name m_filter; Url m_se; FCList m_sefcs; Name m_se_alias; UrlMap m_urls; TimeMap m_mods; private: // methods // Enter an association into the FMS cache. void cache_put(Url purl, Url lurl); // Retrieve the cached input for an output replica. Url cache_get(Url lurl); // Implementation of file get. Used by get and copy. Url internal_get(Url inurl, std::string prefix, const NameList& prots, bool make_copy); public: // methods // Default constructor. // Catalog is invalid. FileManagementSystem(); // Constructor from an environment. // File replica catalog list from DIAL_FMS_CATALOG // Allowed protocols from DIAL_FMS_PROTOCOLS // Protocol order from DIAL_FMS_PROTOCOL_ORDER // Input URL filter from DIAL_FMS_FILTER // Storage element from DIAL_FMS_SE // Storage replica catalog from DIAL_FMS_SE_CATALOG // Storage element alais from DIAL_FMS_SE_ALIAS // Verbose if DIAL_FMS_VERBOSE is not empty FileManagementSystem(const Environment& env); // Set this as the default FMS. // Previous default is destroyed. // Returns 0 for success. int set_default(); // Validity. bool is_valid() const; // Verbosity. // True if set verbose in ctor if the local file // debug_FileManagementSystem exists. bool verbose() const; // Error code. // From construction or the last get, put or register. int error() const; // Error message corresponding to the current error code. std::string error_message() const; // Error message with class name prepended. std::string long_error_message() const; // Return the list of replica catalogs. // The list is checked when files are retrieved. const FCList& file_catalogs() const; // Return the list of allowed protocols for retrival. const NameList& allowed_protocols() const; // String representation of the allowed protocols. std::string allowed_protocols_string() const; // Protocol order. const NameList& protocol_order() const; // Protocol order. std::string protocol_order_string() const; // Actual list of extended protocols combining the allowed and // ordered values. const NameList& protocols() const; // Return the input URL filter. Name input_filter() const; // Storage element. // some examples: // file:/topdir/subdir // file:///topdir/subdir // gsiftp://gsi.mysite.org/topdir/subdir // srm://srm.mysite.org/topdir/subdir Url storage_element() const; // Return the SE alias. Name se_alias() const; // Return the SE file catalog. // If not null, it is used to register files put into the SE. FileCatalog* se_file_catalog() const; // Retrieve a file corresponding to the given URL. // The retrieved file will be one of the allowed protocols selected // using the protocol order. Url get(Url inurl); // Copy a file to the specified directory URL. // The only supported directory protocol is file. // A copy suffix is appended to the name. // Returns nonblank for success. Url copy(Url inurl, Url outdir); // Put a file into the storage element. // The base file name (everything after the last path delimiter) remains // the same. // For success, the URL of the stored file is returned. Url put(Url inurl); // Release a replica. // Just a placeholder for now. // Returns 0 for success. int release(Url purl); // Display the cache. void display_cache(std::ostream& sout); }; } // end namespace dset // Output stream. std::ostream& operator<<(std::ostream& lhs, const dset::FileManagementSystem& rhs); #endif