Source code for opyncorporates.api

from datetime import datetime
import json
import re

import requests

BASE_URL = 'https://api.opencorporates.com'


[docs]class Request(object): """ An object for consuming the opencorporates API. A user can instantiate a Request object in any one of the following three ways: - provide the complete request url as the first and only argument - provide the route url (i.e., the portion of the url following https://api.opencorporates.com) as the first and only argument - provide the request args as positional arguments and key-value pairs as keyword arguments The last way is intended to mimic the opencorporates REST API syntax. Request args (i.e. arguments separated by '/' characters following the base url) are provided as positional arguments, and request vars (i.e., key-value pairs following the '?' character in the url and separated by '&' characters) are provided as keyword arguments. Examples ------- https://api.opencorporates.com/v0.4/companies/search?q=google --> Request('https://api.opencorporates.com/v0.4/companies/search?q=google') Request('/v0.4/companies/search?q=google') Request('v0.4', 'companies', 'search', q='Google') Notes ----- See https://api.opencorporates.com/documentation/API-Reference for additional information on available arguments and keyword arguments. Attributes ---------- _request_args: The list of args used in the request. _request_vars: The list of key-value variables used in the request. api_version: str The API version used for the request. api_token: str The API token used for the request. object_type: str The type of object associated with the request. url: str The url for the request. responses: list A list of all responses objects returned by the Request object. """ def __init__(self, *args, **kwargs): self.args = list(args) self.vars = kwargs self.api_token = kwargs.get('api_token', None) self.url = None self.responses = [] # select build method pattern = r'^http[s]{0,1}://api\.opencorporates.com' if re.match(pattern, str(self.args[0])): url = self.args.pop(0) route = url.replace('https://', '').replace('http://', '') route = route.split('/', 1)[1] self.__build_from_route(route, *self.args, **self.vars) elif len(args) == 1 and '/' in args[0]: self.__build_from_route(*args, **self.vars) else: self.api_version = self.args.pop(0) self.__build(self.api_version, *self.args, **self.vars) @property def response(self): """ Returns the most current response for the request. Returns ------- response: obj A requests.Models.Response object with a requested_at attribute. """ if len(self.responses) == 0: self.get_response() return self.responses[-1]
[docs] def get_response(self): """ Submits the request to the opencorporates API. When called, this method submits the request to the opencorporates API and appends it to the responses attribute. Returns ------- response: obj A requests.Models.Response object with a requested_at attribute. """ response = requests.get(self.url) response.requested_at = datetime.utcnow() self.responses.append(response) return response
def __build(self, api_version, *args, **kwargs): """ Build the Request object using args and kwargs provided.""" self.api_token = None self.api_version = str(api_version).replace('v', '') self.args = list(args) self.vars = kwargs or {} # get api_token if provided self.api_token = kwargs.get('api_token', None) if 'api_token' in kwargs.keys() and kwargs.get('api_token') is None: del kwargs['api_token'] # clean query term for request url if self.vars.get('q', None): self.vars['q'] = self.vars['q'].lower().replace(' ', '+') # build request url self.url = "%s/v%s/" % (BASE_URL, self.api_version) self.url += '/'.join([str(a) for a in self.args]) if self.vars is not None: self.url += '?' self.url += '&'.join(['%s=%s' % (k, v) for k, v in self.vars.items()]) def __build_from_route(self, route, *args, **kwargs): """ Parse route into args and kwargs. This method parses a provided route string into args and kwargs and then submits it to the __build method to complete instantiation of the Request object. """ if route[0] == '/': route = route[1:] request_args = route.split('/') request_vars = {} if '?' in request_args[-1]: request_args[-1], request_vars = request_args[-1].split('?') request_vars = {x[0]: x[1] for x in [x.split("=") for x in request_vars.split("&")]} request_args.extend(list(args)) for k, v in kwargs.items(): request_vars[k] = v self.__build(*request_args, **request_vars) def __str__(self): """ Pretty print the Request object.""" obj = dict() # remove hidden key-value pairs for k, v in self.__dict__.items(): if not k.startswith('_'): obj[k] = v return json.dumps(obj, indent=4, sort_keys=True)
[docs]class FetchRequest(Request): """ Get an item from the opencorporates API by unique identifier. In most cases, a unique identifying number is provided after the object type. When selecting companies, however, the company's two-character country code must be supplied, too. Notes ----- See https://api.opencorporates.com/documentation/API-Reference for additional information on available arguments and keyword arguments. Examples -------- officer = FetchRequest('v0.4', 'officers', '123456') company = FetchRequest('v0.4', 'companies', 'gb', '00102498') Parameters ---------- api_version: str The API version used for the request object_type: str The type of object to search for. Attributes ---------- object_type: str The requested object's type. results: dict The requested item. """ def __init__(self, api_version, object_type, *args, **kwargs): self.object_type = object_type self.results = None api_version = str(api_version).replace('v', '') # construct args args = list(args) args.insert(0, "v%s" % api_version) args.insert(1, self.object_type) super(FetchRequest, self).__init__(*args, **kwargs) if self.response.status_code == 200: response_text = json.loads(self.response.text) if len(response_text['results']) == 0: self.results = [] else: self.results = list(response_text['results'].values())[0]
[docs]class MatchRequest(Request): """ Submits a match request to the opencorporates API. The MatchRequest class is a subclass of the Request class. Like the Request class, the Search class is intended to mimic the opencorporates REST API syntax. Request args (i.e. arguments separated by '/' characters following the base url) are provided as positional arguments, and request vars (i.e., key-value pairs following the '?' character in the url and separated by '&' characters) are provided as keyword arguments. Notes ----- See https://api.opencorporates.com/documentation/API-Reference for additional information on available arguments and keyword arguments. Parameters ---------- api_version: str The API version used for the request object_type: str The type of object to search for. q: str (optional) The term that you are searching for Attributes ---------- object_type: str The type of object to search for. search_term: str The original term provided for the match request. q: str The match term transformed for use in the request url. results: dict The results of the match request. """ def __init__(self, cls, api_version, object_type, *args, **kwargs): q = kwargs.pop('q', None) if q is None: raise ValueError('Enter a term for your match request.') self._cls = cls self.object_type = object_type self.match_term = q self.q = q.lower().replace(' ', '+') self.results = {} api_version = str(api_version).replace('v', '') # construct args args = list(args) args.insert(0, "v%s" % api_version) args.insert(1, self.object_type) args.append('match') # construct kwargs kwargs['q'] = self.q super(MatchRequest, self).__init__(*args, **kwargs) if self.response.status_code == 200: response_text = json.loads(self.response.text) self.results = list(response_text['results'].values())[0] self.results = [r for r in self.results]
[docs]class SearchRequest(Request): """ Submits a search request to the opencorporates API. The SearchRequest class is a subclass of the Request class. Like the Request class, the Search class is intended to mimic the opencorporates REST API syntax. Request args (i.e. arguments separated by '/' characters following the base url) are provided as positional arguments, and request vars (i.e., key-value pairs following the '?' character in the url and separated by '&' characters) are provided as keyword arguments. Search results are provided in a paginated format. Each page of the search results is obtained through a separate request to the opencorporates API with page=X supplied as a request variable. Notes ----- See https://api.opencorporates.com/documentation/API-Reference for additional information on available arguments and keyword arguments. Parameters ---------- api_version: str The API version used for the request object_type: str The type of object to search for. q: str (optional) The term that you are searching for Attributes ---------- In addition to the attributes exposed by the Request class: object_type: str The type of object to search for. search_term: str The original term provided for the search q: str The search term transformed for use in the request url per_page: int The number of search results per page total_pages: int The total number of pages of search results total_count: int The total number of search results page_urls: list A list of all request urls for obtaining the search results """ def __init__(self, api_version, object_type, *args, **kwargs): q = kwargs.pop('q', None) if q is None: raise ValueError('Enter a term for your search') self.object_type = object_type self.search_term = q self.q = q.lower().replace(' ', '+') self.per_page = None self.total_pages = None self.total_count = None self.page_urls = [] api_version = str(api_version).replace('v', '') # construct args args = list(args) args.insert(0, "v%s" % api_version) args.insert(1, self.object_type) args.append('search') # construct kwargs kwargs['q'] = self.q super(SearchRequest, self).__init__(*args, **kwargs) if self.response.status_code == 200: results = json.loads(self.response.text)['results'] self.per_page = int(results['per_page']) self.total_pages = int(results['total_pages']) self.total_count = int(results['total_count']) self.page_urls = ["%s&page=%s" % (self.url, p + 1) for p in range(self.total_pages)] @property def results(self): """ Yields all search results. Yields ------ item: dict A dictionary representing a search result item. """ for p in self.page_urls: page = self.get_page(p) for item in page: yield item return
[docs] def get_page(self, page): """ Calls the opencorporates API and returns a page of results. Parameters ---------- page : int the page of search results to return. Returns ------- list A list of dict objects """ url = self.url + '&page=%s' % page response = requests.get(url) if response.status_code == 200: res = json.loads(response.text)['results'][self.object_type] items = [] for item in res: for k,v in item.items(): items.append(v) return items