#!/usr/bin/python ''' Copyright (c) 2008 Dorothea Salo All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ''' import os from io import read_file, write_file class Namelist: suffixlist = ["III", "Ph.D", "MS", "II", "Jr", "Jr.", "Sr", "Sr."] def __init__(self, str): self.str = str.strip() self.namelist = [] authorlist = str.split(",") #FIXME: this assumes only one suffix, when they could be multiple for auth in authorlist: if auth.strip() in self.suffixlist: prev = authorlist.index(auth) -1 prevauthor = authorlist[prev] + ", %s" % (auth.strip()) self.namelist[prev] = prevauthor continue self.namelist.append(auth) def get_namelist(self): return self.namelist class Contents: '''Makes DSpace "contents" files. HAS SIDE EFFECTS!''' def __init__(self, folder, oneitem=0): self.folder = folder if oneitem: self.process_folder(self.folder) else: for item in os.listdir(self.folder): self.process_folder(os.path.join(folder, item)) def process_folder(self, folder): filelist = [] for dirpath, dirs, files in os.walk(os.path.normpath(folder)): #figure out the length of the common prefix #so as to erase all but the correct relative from the listed files #necessary now that DSpace can handle nested directories #though I admit there is probably a less stupid way to do this erase = len(os.path.commonprefix([folder, dirpath])) for file in files: if file[0] == ".": continue #no .DS_Store if file not in ["dublin_core.xml", "contents", "license.txt", "license_rdf", "license_xml", "license_text"]: filestring = dirpath + os.sep + file if filestring[0] == "/": filestring = filestring[1:] newfile = filestring[erase:] if newfile[0] != ".": #no hidden directories, please filelist.append(newfile) write_file(os.path.join(folder, "contents"), "\n".join(filelist)) class DC: '''Dublin Core file writing.''' def __init__(self): self.S = "\n" def addvalue(self, element, qualifier, value): self.S = self.S.replace("\n", "\n\t%s\n" % (element, qualifier, value)) return self.S def writeDC(self, folder): write_file(os.path.join(folder, "dublin_core.xml"), self.S) def returnDC(self): return self.S class Name: suffixlist = ["III", "Ph.D", "MS", "II", "Jr", "Jr.", "Sr", "Sr."] def __init__(self, str): self.rawstring = str self.str = str.strip() self.suffix = "" #deal with any suffixes first #if more than one comma, last comma marks suffix list if self.str.count(",") > 1: lastcomma = self.str.rfind(",") self.suffix = self.str[lastcomma+1:].strip() self.str = self.str[0:lastcomma] #now, see if any uncommaed suffixes are left and PREPEND them to existing suffix #this should handle the case of "III Ph.D" templist = self.str.split() if templist[-1] in self.suffixlist: while templist[-1] in self.suffixlist: self.suffix = self.suffix + templist[-1] + " " #kill any comma in the previous segment templist = templist[:-1] if templist[-1].endswith(", "): templist[-1] = templist[-1][0:-2] elif templist[-1].endswith(","): templist[-1] = templist[-1][0:-1] self.str = " ".join(templist) print self.str commaindex = self.str.find(",") dotindex = self.str.rfind(".") #now split out first and last names if commaindex != -1: # for lastname, firstnames self.is_divided = 1 self.lastname = self.str[0:commaindex].strip() self.othernames = self.str[commaindex:].strip() elif dotindex != -1: # for names with initials; covers "J. Doe" and "John Q. Public" self.lastname = self.str[dotindex+1:].strip() self.othernames = self.str[0:dotindex+1].strip() else: lastspaceidx = self.str.rfind(" ") if lastspaceidx == -1: self.lastname = self.str self.othernames = "" else: self.lastname = self.str[lastspaceidx+1:] self.othernames = self.str[0:lastspaceidx] def get_rawstring(self): return self.rawstring def get_lastname(self): return self.lastname def get_othernames(self): return self.othernames def get_suffix(self): return self.suffix def get_lastnamefirst(self): if not self.othernames: return self.lastname else: if self.suffix: return "%s, %s, %s" % (self.lastname, self.othernames, self.suffix) else: return "%s, %s" % (self.lastname, self.othernames) def get_fullname(self): if not self.othernames: return self.lastname else: if self.suffix: return "%s %s, %s" % (self.othernames, self.lastname, self.suffix) else: return "%s %s" % (self.othernames, self.lastname) class Mapfile: def __init__(self, folder): self.folder = folder self.maplist = [] def write_mapfile(self): items = os.listdir(self.folder) for item in items: if not os.path.isdir(os.path.join(self.folder, item)): pass try: handle = read_file(os.path.join(self.folder, item, "handle")) except: continue self.maplist.append("%s %s" % (item, handle)) #handle file has trailing \n already write_file(os.path.join(self.folder, "maplist.txt"), "".join(self.maplist)) def get_maplist(self): return self.maplist