source: mods_parser.py @ 14:0e0f06219896

Revision 14:0e0f06219896, 4.3 KB checked in by hagenbruch@phoibe.ub.rub.de, 8 years ago (diff)

First version of organization RDF

Line 
1#!/usr/bin/env python
2# encoding: utf-8
3
4#  The MIT License
5#
6#  Copyright 2010 Andre Hagenbruch <andre.hagenbruch@ruhr-uni-bochum.de>.
7#
8#  Permission is hereby granted, free of charge, to any person obtaining a copy
9#  of this software and associated documentation files (the "Software"), to deal
10#  in the Software without restriction, including without limitation the rights
11#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12#  copies of the Software, and to permit persons to whom the Software is
13#  furnished to do so, subject to the following conditions:
14#
15#  The above copyright notice and this permission notice shall be included in
16#  all copies or substantial portions of the Software.
17#
18#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21#  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23#  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24#  THE SOFTWARE.
25
26__author__ = "Andre Hagenbruch <andre.hagenbruch@ruhr-uni-bochum.de>"
27__date__ = "$21.01.2010 18:24:20$"
28
29from MARCCodes import MARCRelators
30from MARCCodes import languageCodes
31from bibtex_util import umlaut
32from cgi import escape
33import datetime
34from dc_util import *
35from dcds_util import *
36from genre_maps import *
37from google_book_util import google_book
38from jcr import *
39from lxml import etree
40from lxml import objectify
41from mods_util import *
42import re
43import logging
44from sesame_util import *
45from solr_util import commit
46from solr_util import solr
47from solr_util import update
48from tictocs import *
49from worldcat_util import xisbn
50from xml_util import xml2cdata
51
52import Solr
53
54logging.basicConfig(level=logging.DEBUG)
55
56MODS_NAMESPACE = 'http://www.loc.gov/mods/v3'
57DCTERMS_NAMESPACE = 'http://purl.org/dc/terms'
58XLINK_NAMESPACE = 'http://www.w3.org/1999/xlink'
59MODS = '{%s}' % MODS_NAMESPACE
60DCTERMS = '{%s}' % DCTERMS_NAMESPACE
61XLINK = '{%s}' % XLINK_NAMESPACE
62
63NSMAP = {None: MODS_NAMESPACE,
64    'dcterms': DCTERMS_NAMESPACE,
65    'xlink': XLINK_NAMESPACE
66}
67
68class Collection(object):
69
70    #def __init__(self, filename, fach, disziplin, ** args):
71    def __init__(self, filename, fach, ** args):
72        self.filename = filename
73        #self.fakultaet = fakultaet
74        self.fach = fach
75        #self.disziplin = disziplin
76        self.institution = args.get('institution', '')
77        self.inst_label = args.get('inst_label', '')
78        self.debug = args.get('debug', '')
79
80    @staticmethod
81    def _objectify_records(filename):
82        '''Transform MODS records to objects.'''
83        records = []
84        tree = etree.parse(filename)
85        mods = tree.findall('.//%smods' % MODS)
86        for record in mods:
87            t = etree.tostring(record)
88            records.append(objectify.fromstring(t))
89        return records
90
91    def convert(self):
92        records = Collection._objectify_records(self.filename)
93        #self.debug = False
94        if self.debug == True:
95            for mods in records:
96                logging.debug(objectify.dump(mods))
97
98        for mods in records:
99            solr = Solr.SolrRecord()
100            solr.title = mods.titleInfo.title
101            try:
102                solr.subtitle = mods.titleInfo.subTitle
103            except AttributeError:
104                pass
105            if mods.titleInfo.get('type', '') == 'abbreviated':
106                solr.abbrtitle = mods.titleInfo.get('type')
107            if mods.titleInfo.get('type', '') == 'translated':
108                solr.alternative = mods.titleInfo.get('type')
109            try:
110                if mods.note.get('displayLabel', '') == 'Titelzusätze':
111                    solr.titelzusatz = mods.note.get('displayLabel')
112            except AttributeError:
113                pass
114            #names = [n for n in mods.name]
115            #print names
116            for name in mods.name:
117                if name.get('type') == 'personal':
118                    if name.role.roleTerm == 'aut':
119                        print name.namePart
120                        solr.creator.append(name.namePart)
121            logging.info(solr.serialize().encode('utf8'))
122            solr = None
123
124def main():
125    pass
126
127if __name__ == '__main__':
128    main()
Note: See TracBrowser for help on using the repository browser.