source: doi_checker.py @ 26:4af842589412

Revision 26:4af842589412, 2.5 KB checked in by hagenbruch@phoibe.ub.rub.de, 7 years ago (diff)

Added auto-suggest and linked data mashup; leafing through single hits is now independent of position in result list (i.e. an entry can be bookmarked as is); further improvements...

Line 
1#!/usr/bin/env python
2# encoding: utf-8
3
4#  The MIT License
5#
6#  Copyright 2010 Andre Hagenbruch <andre.hagenbruch@ruhr-uni-bochum.de>.
7#
8#  Permission is hereby granted, free of charge, to any person obtaining a copy
9#  of this software and associated documentation files (the "Software"), to deal
10#  in the Software without restriction, including without limitation the rights
11#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12#  copies of the Software, and to permit persons to whom the Software is
13#  furnished to do so, subject to the following conditions:
14#
15#  The above copyright notice and this permission notice shall be included in
16#  all copies or substantial portions of the Software.
17#
18#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21#  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23#  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24#  THE SOFTWARE.
25
26__author__="Andre Hagenbruch <andre.hagenbruch@ruhr-uni-bochum.de>"
27__date__ ="$20.06.2010 16:46:32$"
28
29import httplib
30import logging
31import urllib
32
33logging.basicConfig(level=logging.DEBUG)
34
35def _fetch_result(params):
36    return eval(urllib.urlopen('http://134.147.247.36:8983/solr/select?', params).read())
37
38def check_doi(doi):
39    conn = httplib.HTTPConnection('dx.doi.org')
40    conn.request('HEAD', '/%s' % doi)
41    res = conn.getresponse()
42    if res.status != 303:
43        params = 'q=doi:"%s"&fl=title+fach+fakultaet&wt=python' % doi
44        res = _fetch_result(params)
45        docs = res['response']['docs']
46        for doc in docs:
47            if doc.get('fach') is not None:
48                logging.error('%s => %s (%s)' % (doi, doc.get('title'), doc.get('fach').capitalize()))
49            else:
50                logging.error('%s => %s (%s)' % (doi, doc.get('title'), doc.get('fakultaet')))
51
52
53def retrieve_dois():
54    params = 'q=*:*&facet=true&facet.field=doi&facet.limit=15000&rows=0&wt=python'
55    res = _fetch_result(params)
56    return res['facet_counts']['facet_fields']['doi']
57
58def main():
59    #check_doi('10.3389/neuro.08.007.2007')
60    #check_doi('10.1101/lm.6.2.138')
61    dois = retrieve_dois()
62    while dois:
63        doi, count = dois[0:2]
64        del dois[0:2]
65        check_doi(doi)
66
67if __name__ == '__main__':
68    main()
Note: See TracBrowser for help on using the repository browser.