source: stw2redis.py @ 38:8a694e2a6847

Revision 38:8a694e2a6847, 3.7 KB checked in by hagenbruch@phoibe.ub.rub.de, 6 years ago (diff)

Version 0.3

Line 
1#! /usr/bin/env python
2# encoding: utf-8
3
4#  The MIT License
5#
6#  Copyright 2011 hagenbruch.
7#
8#  Permission is hereby granted, free of charge, to any person obtaining a copy
9#  of this software and associated documentation files (the "Software"), to deal
10#  in the Software without restriction, including without limitation the rights
11#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12#  copies of the Software, and to permit persons to whom the Software is
13#  furnished to do so, subject to the following conditions:
14#
15#  The above copyright notice and this permission notice shall be included in
16#  all copies or substantial portions of the Software.
17#
18#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21#  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23#  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24#  THE SOFTWARE.
25
26__author__="hagenbruch"
27__date__ ="$16.05.2011 10:51:03$"
28
29import logging
30from lxml import etree
31import redis
32
33logging.basicConfig (level=logging.DEBUG,
34                     format='%(asctime)s %(levelname)-4s %(message)s',
35                     datefmt='%a, %d %b %Y %H:%M:%S',
36                     )
37
38XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
39RDF_NAMESPACE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
40STW_NAMESPACE = 'http://zbw.eu/stw/'
41SKOS_NAMESPACE = 'http://www.w3.org/2004/02/skos/core#'
42XML = '{%s}' % XML_NAMESPACE
43RDF = '{%s}' % RDF_NAMESPACE
44STW = '{%s}' % STW_NAMESPACE
45SKOS = '{%s}' % SKOS_NAMESPACE
46
47NSMAMP = {
48    'xml': XML_NAMESPACE,
49    'rdf': RDF_NAMESPACE,
50    'stw': STW_NAMESPACE,
51    'skos': SKOS_NAMESPACE,
52}
53
54def stwparser(filename):
55    stw_dict = {}
56    stw = etree.parse(filename)
57
58    subjects = stw.findall('.//%sDescription' % RDF)
59    for subject in subjects:
60        try:
61            about = subject.attrib.get('%sabout' % RDF).split('/')
62            if about[0] == 'thsys':
63                continue
64        except AttributeError:
65            continue
66        for predicate in subject:
67            if predicate.tag == '%sbroader' % SKOS:
68                resource = predicate.attrib.get('%sresource' % RDF).split('/')
69                if resource[0] != 'thsys':
70                    stw_dict.setdefault(about[1], {}).setdefault('broader', []).append(resource[1])
71            if predicate.tag == '%snarrower' % SKOS:
72                resource = predicate.attrib.get('%sresource' % RDF).split('/')
73                if resource[0] != 'thsys':
74                    stw_dict.setdefault(about[1], {}).setdefault('narrower', []).append(resource[1])
75            if predicate.tag == '%srelated' % SKOS:
76                resource = predicate.attrib.get('%sresource' % RDF).split('/')
77                if resource[0] != 'thsys':
78                    stw_dict.setdefault(about[1], {}).setdefault('related', []).append(resource[1])
79            if predicate.tag == '%sprefLabel' % SKOS:
80                stw_dict.setdefault(about[1], {}).setdefault('prefLabel_%s' % predicate.attrib.get('%slang' % XML), []).append(predicate.text)
81            if predicate.tag == '%saltLabel' % SKOS:
82                stw_dict.setdefault(about[1], {}).setdefault('altLabel_%s' % predicate.attrib.get('%slang' % XML), []).append(predicate.text)
83
84    return stw_dict
85   
86def main():
87    r = redis.Redis(host='localhost', port=6379, db=1)
88    r.flushdb()
89
90    stw_dict = stwparser('stw/stw.rdf')
91
92    for stw in stw_dict:
93        r.hset('stwid', stw, stw_dict.get(stw))
94
95if __name__ == "__main__":
96    main()
Note: See TracBrowser for help on using the repository browser.