source: jcover_test.py @ 38:8a694e2a6847

Revision 38:8a694e2a6847, 1.2 KB checked in by hagenbruch@phoibe.ub.rub.de, 6 years ago (diff)

Version 0.3

Line 
1import logging
2
3logging.basicConfig(level=logging.DEBUG)
4
5import urllib
6import simplejson
7
8from os import listdir
9
10def read_filenames(dir):
11    filenames = []
12    for fn in listdir(dir):
13        stem, suffix = fn.split('.')
14        filenames.append(stem)
15    return filenames
16
17#def read_issns(filename):
18#    issns = []
19#    data = open(filename, 'r').readlines()
20#
21#    for line in data:
22#        #issn, url = line.split(';')
23#        issns.append(line.strip())
24#    return issns
25
26def read_issns():
27    solr_issns = simplejson.load(urllib.urlopen('http://134.147.243.89:8990/solr/terms?terms.fl=issn&terms.limit=-1&omitHeader=true&wt=json&json.nl=map'))
28
29    issns = []
30    for issn in solr_issns.get('terms').get('issn'):
31        issns.append(issn)
32
33    return issns
34
35def main():
36    filenames = set(read_filenames('/usr/local/nginx/html/media/bibliographie-02/covers/'))
37    issns = set(read_issns())
38
39    #print set.intersection(filenames, issns)
40    #print len(set.intersection(filenames, issns))
41
42#    for issn in set.intersection(filenames, issns):
43#        logging.info(issn)
44    for issn in issns:
45        if issn not in filenames:
46            logging.info(issn)
47
48if __name__ == "__main__":
49    main()
Note: See TracBrowser for help on using the repository browser.