Source code for weblyzard_api.client.domain_specificity

'''
.. codeauthor:: Albert Weichselbraun <albert.weichselbraun@htwchur.ch>
'''
import unittest 

from eWRT.ws.rest import  MultiRESTClient
from weblyzard_api.client import WEBLYZARD_API_URL, WEBLYZARD_API_USER, WEBLYZARD_API_PASS

[docs]class DomainSpecificity(MultiRESTClient): ''' **Domain Specificity Web Service** Determines whether documents are relevant for a given domain by searching for domain relevant terms in these documents. **Workflow** 1. submit a domain-specificity profile with :func:`add_profile` 2. obtain the domain-speificity of text documents with :func:`get_domain_specificity`, :func:`parse_documents` or :func:`search_documents`. ''' URL_PATH = 'rest/domain_specificity' def __init__(self, url=WEBLYZARD_API_URL, usr=WEBLYZARD_API_USER, pwd=WEBLYZARD_API_PASS): ''' :param url: URL of the jeremia web service :param usr: optional user name :param pwd: optional password ''' MultiRESTClient.__init__(self, service_urls=url, user=usr, password=pwd)
[docs] def add_profile(self, profile_name, profile_mapping): ''' Adds a domain-specificity profile to the Web service. :param profile_name: the name of the domain specificity profile :param profile_mapping: a dictionary of keywords and their \ respective domain specificity values. ''' return self.request('add_or_refresh_profile/%s' % profile_name, profile_mapping, execute_all_services=True)
[docs] def get_domain_specificity(self, profile_name, documents, is_case_sensitive=True): ''' :param profile_name: the name of the domain specificity profile to \ use. :param documents: a list of dictionaries containing the document :param is_case_sensitive: whether to consider case or not (default: True) ''' return self.request('parse_documents/%s/%s' % (profile_name, is_case_sensitive), documents)
def _bucketize(self, l, n): return (l[i:i+n] for i in xrange(0, len(l), n))
[docs] def parse_documents(self, matview_name, documents, is_case_sensitive=False): ''' :param matview_name: a comma separated list of matview_names to check \ for domain specificity. :param documents: a list of dictionaries containing the document :param is_case_sensitive: case sensitive or not :returns: dict (profilename: (content_id, dom_spec)) ''' found_tags = {} for document_batch in self.get_document_batch(documents): for batch in self._bucketize(document_batch, 1): result = self.request('parse_documents/%s/%s' % (matview_name, is_case_sensitive), batch) if result: found_tags.update(result[matview_name]) return found_tags
[docs] def search_documents(self, profile_name, documents, is_case_sensitive=False): return self.request('search_documents/%s/%s' % (profile_name, is_case_sensitive), documents)
[docs] def list_profiles(self): ''' :returns: a list of all available domain specificity profiles. ''' return self.request('list_profiles')
[docs] def has_profile(self, profile_name): ''' Returns whether the given profile exists on the server. :param profile_name: the name of the domain specificity profile to \ check. :returns: ``True`` if the given profile exists on the server. ''' return profile_name in self.list_profiles()
[docs] def meminfo(self): ''' :returns: Information on the web service's memory consumption ''' return self.request('meminfo')
class TestDomainSpecificity(unittest.TestCase): def setUp(self): self.client = DomainSpecificity() self.service_is_online = self.jesaja.is_online() if not self.service_is_online: print 'WARNING: Webservice is offline --> not executing all tests!!' def test_domain_specificity(self): if not self.service_is_online: return PROFILE_NAME1 = 'test1' PROFILE_MAPPING1 = {'ana': 0.8, 'daniela': 0.7, 'markus': 0.2} PROFILE_NAME2 = 'test2' PROFILE_MAPPING2 = {'jasna': 1.0, 'pool': 0.1, 'ana': -0.2} self.client.add_profile(PROFILE_NAME1, PROFILE_MAPPING1) self.client.add_profile(PROFILE_NAME2, PROFILE_MAPPING2) assert self.client.has_profile(PROFILE_NAME1) assert not self.client.has_profile('unknown') self._test_domain_specifcity(PROFILE_NAME1) def _test_domain_specifcity(self, domain_specificity_profile): if not self.service_is_online: return TEST_DOCUMENT_LIST = [{'content_id': '1', 'title': 'Ana loves Daniel.', 'content': 'ana knowns daniela and gerhard.'}, {'content_id': '2', 'title': 'Jasna knows Ana.', 'content': 'I have met jasna at the pool.'}] print self.client.get_domain_specificity(domain_specificity_profile, TEST_DOCUMENT_LIST) print self.client.get_domain_specificity(domain_specificity_profile, TEST_DOCUMENT_LIST, is_case_sensitive=False) def test_meminfo(self): if not self.service_is_online: return print self.client.meminfo() if __name__ == '__main__': unittest.main()