Source code for aflow.control

"""Functions for controlling high-level search functionality for the
AFLOW database.
"""
server = "http://aflowlib.duke.edu/search/API/?"
"""str: API server address over HTTP.
"""

from aflow import msg


[docs]class Query(object): """Represents a search againts the AFLUX API. Args: catalog (str): one of the catalogs supported on AFLOW: ['icsd', 'lib1', 'lib2', 'lib3']. Also supports a `list` of catalog names. batch_size (int): number of data entries to return per HTTP request. step (int): step size over entries. Attributes: filters (list): of `str` filter arguments to pass to the matchbook section of the API request. select (list): of :class:`aflow.keywords.Keyword` to *include* in the request. excludes (list): of :class:`aflow.keywords.Keyword` to *exclude* in the request. orderby (str): name of the keyword to order by. AFLUX only supports a single order-by parameter for now. catalog (str): one of the catalogs supported on AFLOW: ['icsd', 'lib1', 'lib2', 'lib3']. Also supports a `list` of catalog names. N (int): number of results in the current search query. reverse (bool): when True, reverse the order of the results in the query. k (int): number of datasets per page for the current iterator. Can be controlled by `batch_size`. responses (dict): keys are (n,k) tuples from the pagination; values are the corresponding JSON dictionaries. step (int): step size over entries. """ def __init__(self, catalog=None, batch_size=100, step=1): from aflow.keywords import keywords self.filters = [] self.selects = []#keywords] self.excludes = [] self.order = None self.catalog = catalog if isinstance(catalog, (list, tuple, type(None))) else [catalog] self._N = None """int: number of results in the current search query.""" self.reverse = False self._n = 1 self.k = batch_size self.step = step self.responses = {} self._iter = 0 """int: current integer id of the iterator in the *whole* dataset; this means it can have a value greater than :attr:`k`. """ self._max_entry = None """int: index of the maximum entry that should be returned from this query. """ self._matchbook = None """str: matchbook portion of the request URL. """ self._final = False """bool: when True, this query is finalized and no additional filters, etc. can be added to it. """
[docs] def reset_iter(self): """Resets the iterator back to zero so that the collection can be iterated over again *without* needing to request the data from the server again. """ self._iter = 0
@property def n(self): """Current page number for the iterator. """ if self.reverse: return -1*self._n else: return self._n @property def N(self): if self._N is None: response = self._request(self.n, self.k) return self._N @property def max_N(self): """Returns the maximum integer index that will be reached by this query. """ if self._max_entry is None: return self.N else: return self._max_entry def __len__(self): return self.N def __getitem__(self, seq): #We need to trigger the first request to make sure that the total #number of entries is fixed on the parent object, and so that the #pointers to the response caching are all the same. assert len(self) > 0 from copy import copy result = copy(self) if type(seq) is slice: #Perform a shallow copy so that we get a new reference for the #indices. result._iter = seq.start if seq.start is not None else 0 result._max_entry = seq.stop result.step = seq.step return result elif isinstance(seq, int): result._iter = seq return next(result) def _request(self, n, k): """Constructs the query string for this :class:`Query` object for the specified paging limits and then returns the response from the REST API as a python object. Args: n (int): page number of the results to return. k (int): number of datasets per page. """ if len(self.responses) == 0: #We are making the very first request, finalize the query. self.finalize() import json from six.moves import urllib urlopen = urllib.request.urlopen url = "{0}{1},{2}".format(server, self.matchbook(), self._directives(n, k)) rawresp = urlopen(url).read().decode("utf-8") try: response = json.loads(rawresp) except:# pragma: no cover #We can't easily simulate network failure... msg.err("{}\n\n{}".format(url, rawresp)) return #If this is the first request, then save the number of results in the #query. if len(self.responses) == 0: self._N = int(next(iter(response.keys())).split()[-1]) self.responses[n] = response
[docs] def finalize(self): """Finalizes the current state of the query. This means that the request URL will be saved, but the individual keyword objects will be *reset*. Re-executing the search query will reconstruct the same object and request, but any cached responses will be lost. """ #Generate the matchbook query, this has all the filters, selects and #ordering information. self.matchbook() #Next, reset all the keywords for the global AFLOW. from aflow.keywords import reset reset() #Switch out all of the keyword instances for their string #representations. self.selects = [str(s) for s in self.selects] self.filters = [str(f) for f in self.filters] self.excludes = [str(x) for x in self.excludes] self.order = str(self.order) if self.order is not None else None #Set the finalizer flag so that this object doesn't allow mutations. self._final = True
[docs] def matchbook(self): """Constructs the matchbook portion of the query. """ if not self._final or self._matchbook is None: items = [] #AFLUX orders by the first element in the query. If we have an orderby #specified, then place it first. if self.order is not None: items.append(str(self.order)) items.extend(list(map(str, self.selects))) items.extend(list(map(str, self.filters))) items.extend(["${}".format(str(k)) for k in self.excludes]) self._matchbook = ','.join(items) return self._matchbook
def _directives(self, n, k): """Returns the directives portion of the AFLUX query. Args: n (int): page number of the results to return. k (int): number of datasets per page. """ items = [] if self.catalog is not None: items.append("catalog({})".format(':'.join(self.catalog))) #Next, add the paging context. This query maintains its own paging #context items.append("paging({0:d},{1:d})".format(n, k)) return ','.join(items) def __iter__(self): return self
[docs] def next(self):# pragma: no cover """Yields a generator over AFLUX API request results. """ return self.__next__()
def __next__(self): """Yields a generator over AFLUX API request results. """ #First, find out which entry we are on. n = (self._iter // self.k) + 1 i = self._iter % self.k #Reverse the sign now that we have figured out the ordinal page number. if self.reverse: n *= -1 if n not in self.responses and self._iter < self.max_N: self._n = abs(n) self._request(self.n, self.k) assert len(self.responses) > 0 from aflow.entries import Entry if self._iter < self.max_N: index = self.k*(abs(n)-1) + i + 1 key = "{} of {}".format(index, self.N) raw = self.responses[n][key] result = Entry(**raw) #Increment the iterator right before we return the entry. self._iter += 1 return result else: raise StopIteration() def _final_check(self): """Checks whether this object is finalized; if it is, print a friendly message and return False, otherwise True. """ if self._final: msg.info("This query has been finalized. It cannot be mutated. " "Create a new query to change the matchbook.") return not self._final
[docs] def filter(self, keyword): """Adds a search term to the current filter list. Calling :meth:`filter` multiple times will join the final filters together using logical *and*. Args: keyword (aflow.keywords.Keyword): that encapsulates the AFLUX request language logic. """ if self._final_check(): self._N = None self.filters.append(keyword) return self
[docs] def select(self, *keywords): """Adds a keyword to the list of properties to return for each material in the request. Args: keywords (list): of :class:`aflow.keywords.Keyword` that encapsulates the AFLUX request language logic. """ if self._final_check(): self._N = None for keyword in keywords: if keyword is not self.order: self.selects.append(keyword) return self
[docs] def orderby(self, keyword, reverse=False): """Sets a keyword to be the one by which Args: keyword (aflow.keywords.Keyword): that encapsulates the AFLUX request language logic. reverse (bool): when True, reverse the ordering. """ if self._final_check(): self._N = None self.order = keyword self.reverse = reverse if keyword in self.selects: self.selects.remove(keyword) return self
[docs] def exclude(self, *keywords): """Sets a keyword to be *excluded* from the response. Args: keywords (list): of :class:`aflow.keywords.Keyword` that encapsulates the AFLUX request language logic. """ if self._final_check(): self._N = None for keyword in keywords: self.excludes.append(keyword) return self