1# Copyright (C) 2020 Gradient Boosted Investments, Inc. - All Rights Reserved
2
3import base64
4import csv
5import datetime
6import functools
7import io
8import itertools
9import json
10import logging
11import math
12import mimetypes
13import os
14import sys
15import tempfile
16import time
17from datetime import date, timedelta
18from typing import Any, Dict, List, Literal, Optional, Tuple, Union
19from urllib import parse
20
21import numpy as np
22import pandas
23import pandas as pd
24import requests
25from dateutil import parser
26
27import boosted.api.graphql_queries as graphql_queries
28from boosted.api.api_type import (
29 BoostedAPIException,
30 BoostedDate,
31 ChunkStatus,
32 ColumnSubRole,
33 DataAddType,
34 DataSetConfig,
35 DataSetType,
36 DateIdentCountryCurrency,
37 GbiIdSecurity,
38 GbiIdTickerISIN,
39 HedgeExperiment,
40 HedgeExperimentDetails,
41 HedgeExperimentScenario,
42 Language,
43 NewsHorizon,
44 PortfolioSettings,
45 Status,
46 ThemeUniverse,
47 hedge_experiment_type,
48)
49from boosted.api.api_util import (
50 get_valid_iso_dates,
51 convert_date,
52 get_date_range,
53 infer_dataset_schema,
54 protoCubeJsonDataToDataFrame,
55 to_camel_case,
56 validate_start_and_end_dates,
57)
58
59logger = logging.getLogger("boosted.api.client")
60logging.basicConfig()
61
62g_boosted_api_url = "https://insights.boosted.ai"
63g_boosted_api_url_dev = "https://insights-dev.boosted.ai"
64WATCHLIST_ROUTE_PREFIX = "/api/dal/watchlist"
65ROUTE_PREFIX = WATCHLIST_ROUTE_PREFIX
66DAL_WATCHLIST_ROUTE = "/api/v0/watchlist"
67DAL_SECURITIES_ROUTE = "/api/v0/securities"
68DAL_PA_ROUTE = "/api/v0/portfolio-analysis"
69PORTFOLIO_GROUP_ROUTE = "/api/v0/portfolio-group"
70
71RISK_FACTOR = "risk-factor"
72RISK_FACTOR_V2 = "risk-factor-v2"
73RISK_FACTOR_COLUMNS = [
74 "depth",
75 "identifier",
76 "stock_count",
77 "volatility",
78 "exposure",
79 "rating",
80 "rating_delta",
81]
82
83
84class BoostedClient:
85 def __init__(
86 self, api_key, override_uri=None, debug=False, proxy=None, disable_verify_ssl=False
87 ):
88 """
89 Parameters
90 ----------
91 api_key: str
92 Your API key provided by the Boosted application. See your profile
93 to generate a new key.
94 proxy: str
95 Your organization may require the use of a proxy for access.
96 The address of a HTTPS proxy in the format of <address>:<port>.
97 Examples are "123.456.789:123" or "my.proxy.com:123".
98 Do not prepend with "https://".
99 disable_verify_ssl: bool
100 Your networking setup may be behind a firewall which performs SSL
101 inspection. Either set the REQUESTS_CA_BUNDLE environment variable
102 to point to the location of a custom certificate bundle, or set this
103 parameter to True to disable SSL verification as a workaround.
104 """
105 if override_uri is None:
106 self.base_uri = g_boosted_api_url
107 else:
108 self.base_uri = override_uri
109 self.api_key = api_key
110 self.debug = debug
111 self._request_params: Dict = {}
112 if debug:
113 logger.setLevel(logging.DEBUG)
114 else:
115 logger.setLevel(logging.INFO)
116 if proxy is not None:
117 self._request_params["proxies"] = {"https": proxy}
118 if disable_verify_ssl:
119 self._request_params["verify"] = False
120
121 def __print_json_info(self, json_data, isInference=False):
122 if "warnings" in json_data.keys():
123 for warning in json_data["warnings"]:
124 logger.warning(" {0}".format(warning))
125 if "errors" in json_data.keys():
126 for error in json_data["errors"]:
127 logger.error(" {0}".format(error))
128 return Status.FAIL
129
130 if "result" in json_data.keys():
131 results_data = json_data["result"]
132 if isInference:
133 if "inferenceResultsUrl" in results_data.keys():
134 res_url = parse.urlparse(results_data["inferenceResultsUrl"])
135 logger.debug(res_url)
136 logger.info("Inference started.")
137 if "updateCount" in results_data.keys():
138 logger.info("Updated {0} rows.".format(results_data["updateCount"]))
139 if "createCount" in results_data.keys():
140 logger.info("Created {0} rows.".format(results_data["createCount"]))
141 return Status.SUCCESS
142
143 def __to_date_obj(self, dt):
144 if isinstance(dt, datetime.datetime):
145 dt = dt.date()
146 elif isinstance(dt, datetime.date):
147 return dt
148 elif isinstance(dt, str):
149 try:
150 dt = parser.parse(dt).date()
151 except ValueError:
152 raise ValueError('dt: "' + dt + '" is not a valid date.')
153 return dt
154
155 def __iso_format(self, dt):
156 date = self.__to_date_obj(dt)
157 if date is not None:
158 date = date.isoformat()
159 return date
160
161 def _check_status_code(self, response, isInference=False):
162 has_json = False
163 try:
164 logger.debug(response.headers)
165 if "Content-Type" in response.headers:
166 if response.headers["Content-Type"].startswith("application/json"):
167 json_data = response.json()
168 has_json = True
169 else:
170 has_json = False
171 except json.JSONDecodeError:
172 logger.error("ERROR: response has no JSON payload.")
173 if response.status_code == 200 or response.status_code == 202:
174 if has_json:
175 self.__print_json_info(json_data, isInference)
176 else:
177 pass
178 return Status.SUCCESS
179 if response.status_code == 404:
180 if has_json:
181 self.__print_json_info(json_data, isInference)
182 raise BoostedAPIException(
183 'Server "{0}" not reachable. Code {1}.'.format(
184 self.base_uri, response.status_code
185 ),
186 data=response,
187 )
188 if response.status_code == 400:
189 if has_json:
190 self.__print_json_info(json_data, isInference)
191 if isInference:
192 return Status.FAIL
193 else:
194 raise BoostedAPIException("Error, bad request. Check the dataset ID.", response)
195 if response.status_code == 401:
196 if has_json:
197 self.__print_json_info(json_data, isInference)
198 raise BoostedAPIException("Authorization error.", response)
199 else:
200 if has_json:
201 self.__print_json_info(json_data, isInference)
202 raise BoostedAPIException(
203 "Error in API response. Status code={0} {1}\n{2}".format(
204 response.status_code, response.reason, response.headers
205 ),
206 response,
207 )
208
209 def _try_extract_error_code(self, result):
210 logger.info(result.headers)
211 if "Content-Type" in result.headers:
212 if result.headers["Content-Type"].startswith("application/json"):
213 if "errors" in result.json():
214 return result.json()["errors"]
215 if result.headers["Content-Type"].startswith("text/plain"):
216 return result.text
217 return str(result.reason)
218
219 def _check_ok_or_err_with_msg(self, res, potential_error_msg: str):
220 if not res.ok:
221 error = self._try_extract_error_code(res)
222 logger.error(error)
223 raise BoostedAPIException(f"{potential_error_msg}: {error}")
224
225 def _get_portfolio_rebalance_from_periods(
226 self, portfolio_id: str, rel_periods: List[str]
227 ) -> List[datetime.date]:
228 """
229 Returns a list of rebalance dates for a portfolio given a list of
230 relative periods of format '1D', '1W', '3M', etc.
231 """
232 resp = self._get_graphql(
233 query=graphql_queries.GET_PORTFOLIO_RELATIVE_DATES_QUERY,
234 variables={"portfolioId": portfolio_id, "relativePeriods": rel_periods},
235 )
236 dates = resp["data"]["portfolio"]["relativeDates"]
237 return [datetime.datetime.strptime(d["date"], "%Y-%m-%d").date() for d in dates]
238
239 def translate_text(self, language: Optional[Union[Language, str]], text: str) -> str:
240 if not language or language == Language.ENGLISH:
241 # By default, do not translate English
242 return text
243
244 params = {"text": text, "langCode": language}
245 url = self.base_uri + "/api/translate/translate-text"
246 headers = {"Authorization": "ApiKey " + self.api_key}
247 logger.info("Translating text...")
248 res = requests.post(url, json=params, headers=headers, **self._request_params)
249 try:
250 result = res.json()["translatedText"]
251 except Exception:
252 raise BoostedAPIException("Error translating text")
253 return result
254
255 def query_dataset(self, dataset_id):
256 url = self.base_uri + "/api/datasets/{0}".format(dataset_id)
257 headers = {"Authorization": "ApiKey " + self.api_key}
258 res = requests.get(url, headers=headers, **self._request_params)
259 if res.ok:
260 return res.json()
261 else:
262 error_msg = self._try_extract_error_code(res)
263 logger.error(error_msg)
264 raise BoostedAPIException("Failed to query dataset: {0}.".format(error_msg))
265
266 def query_namespace_dataset_id(self, namespace, data_type):
267 url = self.base_uri + f"/api/custom-security-dataset/{namespace}/{data_type}"
268 headers = {"Authorization": "ApiKey " + self.api_key}
269 res = requests.get(url, headers=headers, **self._request_params)
270 if res.ok:
271 return res.json()["result"]["id"]
272 else:
273 if res.status_code != 404:
274 error_msg = self._try_extract_error_code(res)
275 logger.error(error_msg)
276 raise BoostedAPIException("Failed to query dataset: {0}.".format(error_msg))
277 else:
278 return None
279
280 def export_global_data(
281 self,
282 dataset_id,
283 start=(datetime.date.today() - timedelta(days=365 * 25)),
284 end=datetime.date.today(),
285 timeout=600,
286 ):
287 query_info = self.query_dataset(dataset_id)
288 if DataSetType[query_info["type"]] != DataSetType.GLOBAL:
289 raise BoostedAPIException(
290 f"Incorrect dataset type: {query_info['type']}" f" - Expected {DataSetType.GLOBAL}"
291 )
292 return self.export_data(dataset_id, start, end, timeout)
293
294 def export_independent_data(
295 self,
296 dataset_id,
297 start=(datetime.date.today() - timedelta(days=365 * 25)),
298 end=datetime.date.today(),
299 timeout=600,
300 ):
301 query_info = self.query_dataset(dataset_id)
302 if DataSetType[query_info["type"]] != DataSetType.STRATEGY:
303 raise BoostedAPIException(
304 f"Incorrect dataset type: {query_info['type']}"
305 f" - Expected {DataSetType.STRATEGY}"
306 )
307 return self.export_data(dataset_id, start, end, timeout)
308
309 def export_dependent_data(
310 self,
311 dataset_id,
312 start=None,
313 end=None,
314 timeout=600,
315 ):
316 query_info = self.query_dataset(dataset_id)
317 if DataSetType[query_info["type"]] != DataSetType.STOCK:
318 raise BoostedAPIException(
319 f"Incorrect dataset type: {query_info['type']}" f" - Expected {DataSetType.STOCK}"
320 )
321
322 valid_date_range = self.getDatasetDates(dataset_id)
323 validStart = valid_date_range["validFrom"]
324 validEnd = valid_date_range["validTo"]
325
326 if start is None:
327 logger.info("Since no start date provided, starting from {0}.".format(validStart))
328 start = validStart
329 if end is None:
330 logger.info("Since no end date provided, ending at {0}.".format(validEnd))
331 end = validEnd
332 start = self.__to_date_obj(start)
333 end = self.__to_date_obj(end)
334 if start < validStart:
335 logger.info("Data does not exist before {0}.".format(validStart))
336 logger.info("Starting from {0}.".format(validStart))
337 start = validStart
338 if end > validEnd:
339 logger.info("Data does not exist after {0}.".format(validEnd))
340 logger.info("Ending at {0}.".format(validEnd))
341 end = validEnd
342 validate_start_and_end_dates(start, end)
343
344 logger.info("Data exists from {0} to {1}.".format(start, end))
345 request_url = "/api/datasets/" + dataset_id + "/export-data"
346 headers = {"Authorization": "ApiKey " + self.api_key}
347
348 data_chunks = []
349 chunk_size_days = 90
350 while start <= end:
351 chunk_end = start + timedelta(days=chunk_size_days)
352 if chunk_end > end:
353 chunk_end = end
354
355 logger.info("Requesting start={0} end={1}.".format(start, chunk_end))
356 params = {"start": self.__iso_format(start), "end": self.__iso_format(chunk_end)}
357 logger.debug("URL={0}, headers={1}, params={2}".format(request_url, headers, params))
358
359 res = requests.get(
360 self.base_uri + request_url,
361 headers=headers,
362 params=params,
363 timeout=timeout,
364 **self._request_params,
365 )
366
367 if res.ok:
368 buf = io.StringIO(res.text)
369 df = pd.read_csv(buf, index_col=0, parse_dates=True)
370 if "price" in df.columns:
371 df = df.drop("price", axis=1)
372 data_chunks.append(df)
373 else:
374 error_msg = self._try_extract_error_code(res)
375 logger.error(error_msg)
376 raise BoostedAPIException("Failed to query dataset: {0}.".format(error_msg))
377
378 start = start + timedelta(days=chunk_size_days + 1)
379
380 return pd.concat(data_chunks)
381
382 def export_custom_security_data(
383 self,
384 dataset_id,
385 start=(date.today() - timedelta(days=365 * 25)),
386 end=date.today(),
387 timeout=600,
388 ):
389 query_info = self.query_dataset(dataset_id)
390 if DataSetType[query_info["type"]] != DataSetType.SECURITIES_DAILY:
391 raise BoostedAPIException(
392 f"Incorrect dataset type: {query_info['type']}"
393 f" - Expected {DataSetType.SECURITIES_DAILY}"
394 )
395 return self.export_data(dataset_id, start, end, timeout)
396
397 def export_data(
398 self,
399 dataset_id,
400 start=(datetime.date.today() - timedelta(days=365 * 25)),
401 end=datetime.date.today(),
402 timeout=600,
403 ):
404 logger.info("Requesting start={0} end={1}.".format(start, end))
405 request_url = "/api/datasets/" + dataset_id + "/export-data"
406 headers = {"Authorization": "ApiKey " + self.api_key}
407 start = self.__iso_format(start)
408 end = self.__iso_format(end)
409 params = {"start": start, "end": end}
410 logger.debug("URL={0}, headers={1}, params={2}".format(request_url, headers, params))
411 res = requests.get(
412 self.base_uri + request_url,
413 headers=headers,
414 params=params,
415 timeout=timeout,
416 **self._request_params,
417 )
418 if res.ok or self._check_status_code(res):
419 buf = io.StringIO(res.text)
420 df = pd.read_csv(buf, index_col=0, parse_dates=True)
421 if "price" in df.columns:
422 df = df.drop("price", axis=1)
423 return df
424 else:
425 error_msg = self._try_extract_error_code(res)
426 logger.error(error_msg)
427 raise BoostedAPIException("Failed to query dataset: {0}.".format(error_msg))
428
429 def _get_inference(self, model_id, inference_date=datetime.date.today()):
430 request_url = "/api/models/" + model_id + "/inference-results"
431 headers = {"Authorization": "ApiKey " + self.api_key}
432 params = {}
433 params["date"] = self.__iso_format(inference_date)
434 logger.debug(request_url + ", " + str(headers) + ", " + str(params))
435 res = requests.get(
436 self.base_uri + request_url, headers=headers, params=params, **self._request_params
437 )
438 status = self._check_status_code(res, isInference=True)
439 if status == Status.SUCCESS:
440 return res, status
441 else:
442 return None, status
443
444 def get_inference(
445 self, model_id, inference_date=datetime.date.today(), block=False, timeout_minutes=30
446 ):
447 start_time = datetime.datetime.now()
448 while True:
449 for numRetries in range(3):
450 res, status = self._get_inference(model_id, inference_date)
451 if res is not None:
452 continue
453 else:
454 if status == Status.FAIL:
455 return Status.FAIL
456 logger.info("Retrying...")
457 if res is None:
458 logger.error("Max retries reached. Request failed.")
459 return None
460
461 json_data = res.json()
462 if "result" in json_data.keys():
463 if json_data["result"]["status"] == "RUNNING":
464 still_running = True
465 if not block:
466 logger.warn("Inference job is still running.")
467 return None
468 else:
469 logger.info(
470 "Inference job is still running. Time elapsed={0}.".format(
471 datetime.datetime.now() - start_time
472 )
473 )
474 time.sleep(10)
475 else:
476 still_running = False
477
478 if not still_running and json_data["result"]["status"] == "COMPLETE":
479 csv = json_data["result"]["signals"]
480 logger.info(json_data["result"])
481 if self._check_status_code(res, isInference=True):
482 logger.info(
483 "Total run time = {0}.".format(datetime.datetime.now() - start_time)
484 )
485 return csv
486 else:
487 if "errors" in json_data.keys():
488 logger.error(json_data["errors"])
489 else:
490 logger.error("Error getting inference for date {0}.".format(inference_date))
491 return None
492 if (datetime.datetime.now() - start_time).total_seconds() / 60.0 > timeout_minutes:
493 logger.error("Timeout waiting for job completion.")
494 return None
495
496 def createDataset(self, schema):
497 request_url = "/api/datasets"
498 headers = {"Authorization": "ApiKey " + self.api_key, "Content-Type": "application/json"}
499 s = json.dumps(schema)
500 logger.info("Creating dataset with schema " + s)
501 res = requests.post(
502 self.base_uri + request_url, data=s, headers=headers, **self._request_params
503 )
504 if res.ok:
505 return res.json()["result"]
506 else:
507 raise BoostedAPIException("Dataset creation failed.")
508
509 def create_custom_namespace_dataset(self, namespace, schema):
510 request_url = f"/api/custom-security-dataset/{namespace}"
511 headers = {"Authorization": "ApiKey " + self.api_key, "Content-Type": "application/json"}
512 s = json.dumps(schema)
513 logger.info("Creating dataset with schema " + s)
514 res = requests.post(
515 self.base_uri + request_url, data=s, headers=headers, **self._request_params
516 )
517 if res.ok:
518 return res.json()["result"]
519 else:
520 raise BoostedAPIException("Dataset creation failed.")
521
522 def getUniverse(self, modelId, date=None):
523 if date is not None:
524 url = "/api/models/{0}/universe/{1}".format(modelId, self.__iso_format(date))
525 logger.info("Getting universe for date: {0}.".format(date))
526 else:
527 url = "/api/models/{0}/universe/".format(modelId)
528 headers = {"Authorization": "ApiKey " + self.api_key}
529 res = requests.get(self.base_uri + url, headers=headers, **self._request_params)
530 if res.ok:
531 buf = io.StringIO(res.text)
532 df = pd.read_csv(buf, index_col=0, parse_dates=True)
533 return df
534 else:
535 error = self._try_extract_error_code(res)
536 logger.error(
537 "There was a problem getting this universe or model ID: {0}.".format(error)
538 )
539 raise BoostedAPIException("Failed to get universe: {0}".format(error))
540
541 def add_custom_security_namespace_members(
542 self, namespace, members: Union[pandas.DataFrame, str]
543 ) -> Tuple[pandas.DataFrame, str]:
544 url = self.base_uri + "/api/synthetic-datasets/{0}/generate".format(namespace)
545 headers = {"Authorization": "ApiKey " + self.api_key}
546 headers["Content-Type"] = "application/json"
547 logger.info("Adding custom security namespace for namespace: {0}".format(namespace))
548 strbuf = None
549 if isinstance(members, pandas.DataFrame):
550 df = members
551 df_canon = df.rename(columns={_: to_camel_case(_) for _ in df.columns})
552 canon_cols = ["Currency", "Symbol", "Country", "Name"]
553 if set(canon_cols).difference(df_canon.columns):
554 raise BoostedAPIException(f"Expected columns: {canon_cols}")
555 df_canon = df_canon.loc[:, canon_cols]
556 buf = io.StringIO()
557 df_canon.to_json(buf, orient="records")
558 strbuf = buf.getvalue()
559 elif isinstance(members, str):
560 strbuf = members
561 else:
562 raise BoostedAPIException(f"Unsupported members argument type: {type(members)}")
563 res = requests.post(url, data=strbuf, headers=headers, **self._request_params)
564 if res.ok:
565 res_obj = res.json()
566 res_df = pandas.Series(res_obj["generatedISIN"]).to_frame()
567 res_df.index.name = "Symbol"
568 res_df.columns = ["ISIN"]
569 logger.info("Add to custom security namespace successful.")
570 if "warnings" in res_obj:
571 logger.info("Warnings: {0}.".format(res.json()["warnings"]))
572 return res_df, res.json()["warnings"]
573 else:
574 return res_df, "No warnings."
575 else:
576 error_msg = self._try_extract_error_code(res)
577 raise BoostedAPIException("Failed to get universe: {0}.".format(error_msg))
578
579 def updateUniverse(self, modelId, universe_df, date=datetime.date.today() + timedelta(1)):
580 date = self.__iso_format(date)
581 url = self.base_uri + "/api/models/{0}/universe/{1}".format(modelId, date)
582 headers = {"Authorization": "ApiKey " + self.api_key}
583 logger.info("Updating universe for date {0}.".format(date))
584 if isinstance(universe_df, pd.core.frame.DataFrame):
585 buf = io.StringIO()
586 universe_df.to_csv(buf)
587 target = ("uploaded_universe.csv", buf.getvalue(), "text/csv")
588 files_req = {}
589 files_req["universe"] = target
590 res = requests.post(url, files=files_req, headers=headers, **self._request_params)
591 elif isinstance(universe_df, str):
592 target = ("uploaded_universe.csv", universe_df, "text/csv")
593 files_req = {}
594 files_req["universe"] = target
595 res = requests.post(url, files=files_req, headers=headers, **self._request_params)
596 else:
597 raise BoostedAPIException("Expected CSV as str or Pandas DataFrame.")
598 if res.ok:
599 logger.info("Universe update successful.")
600 if "warnings" in res.json():
601 logger.info("Warnings: {0}.".format(res.json()["warnings"]))
602 return res.json()["warnings"]
603 else:
604 return "No warnings."
605 else:
606 error_msg = self._try_extract_error_code(res)
607 raise BoostedAPIException("Failed to get universe: {0}.".format(error_msg))
608
609 def create_universe(
610 self, universe: Union[pd.DataFrame, str], name: str, description: str
611 ) -> List[str]:
612 PRESENT = "PRESENT"
613 ANY = "ANY"
614 EARLIST_DATE = "1900-01-01"
615 LATEST_DATE = "4000-01-01"
616
617 if isinstance(universe, (str, bytes, os.PathLike)):
618 universe = pd.read_csv(universe)
619
620 universe.columns = universe.columns.str.lower()
621
622 # Clients are free to leave out data. Fill in some defaults here.
623 if "from" not in universe.columns:
624 universe["from"] = EARLIST_DATE
625 if "to" not in universe.columns:
626 universe["to"] = LATEST_DATE
627 if "currency" not in universe.columns:
628 universe["currency"] = ANY
629 if "country" not in universe.columns:
630 universe["country"] = ANY
631 if "isin" not in universe.columns:
632 universe["isin"] = None
633 if "symbol" not in universe.columns:
634 universe["symbol"] = None
635
636 # to prevent conflicts with python keywords
637 universe.rename(columns={"from": "from_date", "to": "to_date"}, inplace=True)
638
639 universe = universe.replace({np.nan: None})
640 security_country_currency_date_list = []
641 for i, r in enumerate(universe.itertuples()):
642 id_type = ColumnSubRole.ISIN
643 identifier = r.isin
644
645 if identifier is None:
646 id_type = ColumnSubRole.SYMBOL
647 identifier = str(r.symbol)
648
649 # if identifier is still None, it means that there is no ISIN or
650 # SYMBOL for this row, in which case we throw an error
651 if identifier is None:
652 raise BoostedAPIException(
653 (
654 f"Missing identifier column in universe row {i + 1}"
655 " should contain ISIN or Symbol"
656 )
657 )
658
659 security_country_currency_date_list.append(
660 DateIdentCountryCurrency(
661 date=r.from_date or EARLIST_DATE,
662 identifier=identifier,
663 country=r.country or ANY,
664 currency=r.currency or ANY,
665 id_type=id_type,
666 )
667 )
668
669 gbi_id_objs = self.getGbiIdFromIdentCountryCurrencyDate(security_country_currency_date_list)
670
671 security_list = []
672 for i, r in enumerate(universe.itertuples()):
673 # if we have a None here, we failed to map to a gbi id
674 if gbi_id_objs[i] is None:
675 raise BoostedAPIException(f"Unable to map row: {tuple(r)}")
676
677 security_list.append(
678 {
679 "stockId": gbi_id_objs[i].gbi_id,
680 "fromZ": r.from_date or EARLIST_DATE,
681 "toZ": LATEST_DATE if r.to_date in (PRESENT, None) else r.to_date,
682 "removal": False,
683 "source": "UPLOAD",
684 }
685 )
686
687 url = self.base_uri + "/api/template-universe/save"
688 headers = {"Authorization": "ApiKey " + self.api_key, "Content-Type": "application/json"}
689 req = {"name": name, "description": description, "modificationDaos": security_list}
690
691 res = requests.post(url, json=req, headers=headers, **self._request_params)
692 self._check_ok_or_err_with_msg(res, "Failed to create universe")
693
694 if "warnings" in res.json():
695 logger.info("Warnings: {0}.".format(res.json()["warnings"]))
696 return res.json()["warnings"].splitlines()
697 else:
698 return []
699
700 def validate_dataframe(self, df):
701 if not isinstance(df, pd.core.frame.DataFrame):
702 logger.error("Dataset must be of type Dataframe.")
703 return False
704 if type(df.index) != pd.core.indexes.datetimes.DatetimeIndex:
705 logger.error("Index must be DatetimeIndex.")
706 return False
707 if len(df.columns) == 0:
708 logger.error("No feature columns exist.")
709 return False
710 if len(df) == 0:
711 logger.error("No rows exist.")
712 return True
713
714 def get_dataset_schema(self, dataset_id):
715 url = self.base_uri + "/api/datasets/{0}/schema".format(dataset_id)
716 headers = {"Authorization": "ApiKey " + self.api_key}
717 res = requests.get(url, headers=headers, **self._request_params)
718 if res.ok:
719 json_schema = res.json()
720 else:
721 error_msg = self._try_extract_error_code(res)
722 logger.error(error_msg)
723 raise BoostedAPIException("Failed to query dataset: {0}.".format(error_msg))
724 return DataSetConfig.fromDict(json_schema["result"])
725
726 def add_custom_security_daily_dataset(
727 self, namespace, dataset, schema=None, timeout=600, block=True
728 ):
729 result = self.add_custom_security_daily_dataset_with_warnings(
730 namespace, dataset, schema, timeout, block
731 )
732 return result["dataset_id"]
733
734 def add_custom_security_daily_dataset_with_warnings(
735 self,
736 namespace,
737 dataset,
738 schema=None,
739 timeout=600,
740 block=True,
741 no_exception_on_chunk_error=False,
742 ):
743 dataset_type = DataSetType.SECURITIES_DAILY
744 dsid = self.query_namespace_dataset_id(namespace, dataset_type)
745
746 if not self.validate_dataframe(dataset):
747 logger.error("dataset failed validation.")
748 return None
749
750 if dsid is None:
751 # create the dataset if not exist.
752 schema = infer_dataset_schema(
753 "custom_security_daily", dataset, dataset_type, infer_from_column_names=True
754 )
755 dsid = self.create_custom_namespace_dataset(namespace, schema.toDict())
756 data_type = DataAddType.CREATION
757 elif schema is not None:
758 raise ValueError(
759 f"Dataset schema already exists for namespace={namespace}, type={dataset_type}",
760 ", cannot create another!",
761 )
762 else:
763 data_type = DataAddType.HISTORICAL
764
765 logger.info("Created dataset with ID = {0}, uploading...".format(dsid))
766 result = self.add_custom_security_daily_data(
767 dsid,
768 dataset,
769 timeout,
770 block,
771 data_type=data_type,
772 no_exception_on_chunk_error=no_exception_on_chunk_error,
773 )
774 return {
775 "namespace": namespace,
776 "dataset_id": dsid,
777 "warnings": result["warnings"],
778 "errors": result["errors"],
779 }
780
781 def add_custom_security_daily_data(
782 self,
783 dataset_id,
784 csv_data,
785 timeout=600,
786 block=True,
787 data_type=DataAddType.HISTORICAL,
788 no_exception_on_chunk_error=False,
789 ):
790 warnings = []
791 query_info = self.query_dataset(dataset_id)
792 if DataSetType[query_info["type"]] != DataSetType.SECURITIES_DAILY:
793 raise BoostedAPIException(
794 f"Incorrect dataset type: {query_info['type']}"
795 f" - Expected {DataSetType.SECURITIES_DAILY}"
796 )
797 warnings, errors = self.setup_chunk_and_upload_data(
798 dataset_id, csv_data, data_type, timeout, block, no_exception_on_chunk_error
799 )
800 if len(warnings) > 0:
801 logger.warning(
802 "Encountered {0} total warnings while uploading dataset.".format(len(warnings))
803 )
804 if len(errors) > 0:
805 raise BoostedAPIException(
806 "Encountered {0} total ERRORS while uploading dataset".format(len(errors))
807 + "\n".join(errors)
808 )
809 return {"warnings": warnings, "errors": errors}
810
811 def add_dependent_dataset(
812 self, dataset, datasetName="DependentDataset", schema=None, timeout=600, block=True
813 ):
814 result = self.add_dependent_dataset_with_warnings(
815 dataset, datasetName, schema, timeout, block
816 )
817 return result["dataset_id"]
818
819 def add_dependent_dataset_with_warnings(
820 self,
821 dataset,
822 datasetName="DependentDataset",
823 schema=None,
824 timeout=600,
825 block=True,
826 no_exception_on_chunk_error=False,
827 ):
828 if not self.validate_dataframe(dataset):
829 logger.error("dataset failed validation.")
830 return None
831 if schema is None:
832 schema = infer_dataset_schema(datasetName, dataset, DataSetType.STOCK)
833 dsid = self.createDataset(schema.toDict())
834 logger.info("Creating dataset with ID = {0}.".format(dsid))
835 result = self.add_dependent_data(
836 dsid,
837 dataset,
838 timeout,
839 block,
840 data_type=DataAddType.CREATION,
841 no_exception_on_chunk_error=no_exception_on_chunk_error,
842 )
843 return {"dataset_id": dsid, "warnings": result["warnings"], "errors": result["errors"]}
844
845 def add_independent_dataset(
846 self, dataset, datasetName="IndependentDataset", schema=None, timeout=600, block=True
847 ):
848 result = self.add_independent_dataset_with_warnings(
849 dataset, datasetName, schema, timeout, block
850 )
851 return result["dataset_id"]
852
853 def add_independent_dataset_with_warnings(
854 self,
855 dataset,
856 datasetName="IndependentDataset",
857 schema=None,
858 timeout=600,
859 block=True,
860 no_exception_on_chunk_error=False,
861 ):
862 if not self.validate_dataframe(dataset):
863 logger.error("dataset failed validation.")
864 return None
865 if schema is None:
866 schema = infer_dataset_schema(datasetName, dataset, DataSetType.STRATEGY)
867 schemaDict = schema.toDict()
868 if "configurationDataJson" not in schemaDict:
869 schemaDict["configurationDataJson"] = "{}"
870 dsid = self.createDataset(schemaDict)
871 logger.info("Creating dataset with ID = {0}.".format(dsid))
872 result = self.add_independent_data(
873 dsid,
874 dataset,
875 timeout,
876 block,
877 data_type=DataAddType.CREATION,
878 no_exception_on_chunk_error=no_exception_on_chunk_error,
879 )
880 return {"dataset_id": dsid, "warnings": result["warnings"], "errors": result["errors"]}
881
882 def add_global_dataset(
883 self, dataset, datasetName="GlobalDataset", schema=None, timeout=600, block=True
884 ):
885 result = self.add_global_dataset_with_warnings(dataset, datasetName, schema, timeout, block)
886 return result["dataset_id"]
887
888 def add_global_dataset_with_warnings(
889 self,
890 dataset,
891 datasetName="GlobalDataset",
892 schema=None,
893 timeout=600,
894 block=True,
895 no_exception_on_chunk_error=False,
896 ):
897 if not self.validate_dataframe(dataset):
898 logger.error("dataset failed validation.")
899 return None
900 if schema is None:
901 schema = infer_dataset_schema(datasetName, dataset, DataSetType.GLOBAL)
902 dsid = self.createDataset(schema.toDict())
903 logger.info("Creating dataset with ID = {0}.".format(dsid))
904 result = self.add_global_data(
905 dsid,
906 dataset,
907 timeout,
908 block,
909 data_type=DataAddType.CREATION,
910 no_exception_on_chunk_error=no_exception_on_chunk_error,
911 )
912 return {"dataset_id": dsid, "warnings": result["warnings"], "errors": result["errors"]}
913
914 def add_independent_data(
915 self,
916 dataset_id,
917 csv_data,
918 timeout=600,
919 block=True,
920 data_type=DataAddType.HISTORICAL,
921 no_exception_on_chunk_error=False,
922 ):
923 query_info = self.query_dataset(dataset_id)
924 if DataSetType[query_info["type"]] != DataSetType.STRATEGY:
925 raise BoostedAPIException(
926 f"Incorrect dataset type: {query_info['type']}"
927 f" - Expected {DataSetType.STRATEGY}"
928 )
929 warnings, errors = self.setup_chunk_and_upload_data(
930 dataset_id, csv_data, data_type, timeout, block, no_exception_on_chunk_error
931 )
932 if len(warnings) > 0:
933 logger.warning(
934 "Encountered {0} total warnings while uploading dataset.".format(len(warnings))
935 )
936 if len(errors) > 0:
937 raise BoostedAPIException(
938 "Encountered {0} total ERRORS while uploading dataset".format(len(errors))
939 + "\n".join(errors)
940 )
941 return {"warnings": warnings, "errors": errors}
942
943 def add_dependent_data(
944 self,
945 dataset_id,
946 csv_data,
947 timeout=600,
948 block=True,
949 data_type=DataAddType.HISTORICAL,
950 no_exception_on_chunk_error=False,
951 ):
952 warnings = []
953 query_info = self.query_dataset(dataset_id)
954 if DataSetType[query_info["type"]] != DataSetType.STOCK:
955 raise BoostedAPIException(
956 f"Incorrect dataset type: {query_info['type']}" f" - Expected {DataSetType.STOCK}"
957 )
958 warnings, errors = self.setup_chunk_and_upload_data(
959 dataset_id, csv_data, data_type, timeout, block, no_exception_on_chunk_error
960 )
961 if len(warnings) > 0:
962 logger.warning(
963 "Encountered {0} total warnings while uploading dataset.".format(len(warnings))
964 )
965 if len(errors) > 0:
966 raise BoostedAPIException(
967 "Encountered {0} total ERRORS while uploading dataset".format(len(errors))
968 + "\n".join(errors)
969 )
970 return {"warnings": warnings, "errors": errors}
971
972 def add_global_data(
973 self,
974 dataset_id,
975 csv_data,
976 timeout=600,
977 block=True,
978 data_type=DataAddType.HISTORICAL,
979 no_exception_on_chunk_error=False,
980 ):
981 query_info = self.query_dataset(dataset_id)
982 if DataSetType[query_info["type"]] != DataSetType.GLOBAL:
983 raise BoostedAPIException(
984 f"Incorrect dataset type: {query_info['type']}" f" - Expected {DataSetType.GLOBAL}"
985 )
986 warnings, errors = self.setup_chunk_and_upload_data(
987 dataset_id, csv_data, data_type, timeout, block, no_exception_on_chunk_error
988 )
989 if len(warnings) > 0:
990 logger.warning(
991 "Encountered {0} total warnings while uploading dataset.".format(len(warnings))
992 )
993 if len(errors) > 0:
994 raise BoostedAPIException(
995 "Encountered {0} total ERRORS while uploading dataset".format(len(errors))
996 + "\n".join(errors)
997 )
998 return {"warnings": warnings, "errors": errors}
999
1000 def get_csv_buffer(self):
1001 return io.StringIO()
1002
1003 def start_chunked_upload(self, dataset_id):
1004 url = self.base_uri + "/api/datasets/{0}/start-chunked-upload".format(dataset_id)
1005 headers = {"Authorization": "ApiKey " + self.api_key}
1006 res = requests.post(url, headers=headers, **self._request_params)
1007 if res.ok:
1008 return res.json()["result"]
1009 else:
1010 error_msg = self._try_extract_error_code(res)
1011 logger.error(error_msg)
1012 raise BoostedAPIException(
1013 "Failed to obtain dataset lock for upload: {0}.".format(error_msg)
1014 )
1015
1016 def abort_chunked_upload(self, dataset_id, chunk_id):
1017 url = self.base_uri + "/api/datasets/{0}/abort-chunked-upload".format(dataset_id)
1018 headers = {"Authorization": "ApiKey " + self.api_key}
1019 params = {"uploadGroupId": chunk_id}
1020 res = requests.post(url, headers=headers, **self._request_params, params=params)
1021 if not res.ok:
1022 error_msg = self._try_extract_error_code(res)
1023 logger.error(error_msg)
1024 raise BoostedAPIException(
1025 "Failed to abort dataset lock during error: {0}.".format(error_msg)
1026 )
1027
1028 def check_dataset_ingestion_completion(self, dataset_id, chunk_id, start_time):
1029 url = self.base_uri + "/api/datasets/{0}/upload-chunk-status".format(dataset_id)
1030 headers = {"Authorization": "ApiKey " + self.api_key}
1031 params = {"uploadGroupId": chunk_id}
1032 res = requests.get(url, headers=headers, **self._request_params, params=params)
1033 res = res.json()
1034
1035 finished = False
1036 warnings = []
1037 errors = []
1038
1039 if type(res) == dict:
1040 dataset_status = res["datasetStatus"]
1041 chunk_status = res["chunkStatus"]
1042 if chunk_status != ChunkStatus.PROCESSING.value:
1043 finished = True
1044 errors = res["errors"]
1045 warnings = res["warnings"]
1046 successful_rows = res["successfulRows"]
1047 total_rows = res["totalRows"]
1048 logger.info(
1049 f"Successfully ingested {successful_rows} out of {total_rows} uploaded rows."
1050 )
1051 if chunk_status in [
1052 ChunkStatus.SUCCESS.value,
1053 ChunkStatus.WARNING.value,
1054 ChunkStatus.ERROR.value,
1055 ]:
1056 if dataset_status != "AVAILABLE":
1057 raise BoostedAPIException(
1058 "Dataset was unexpectedly unavailable after chunk upload finished."
1059 )
1060 else:
1061 logger.info("Ingestion complete. Uploaded data is ready for use.")
1062 elif chunk_status == ChunkStatus.ABORTED.value:
1063 errors.append(
1064 "Dataset chunk upload was aborted by server! Upload did not succeed."
1065 )
1066 else:
1067 errors.append("Unexpected data ingestion status: {0}.".format(chunk_status))
1068 logger.info(
1069 "Data ingestion still running. Time elapsed={0}.".format(
1070 datetime.datetime.now() - start_time
1071 )
1072 )
1073 else:
1074 raise BoostedAPIException("Unable to get status of dataset ingestion.")
1075 return {"finished": finished, "warnings": warnings, "errors": errors}
1076
1077 def _commit_chunked_upload(self, dataset_id, chunk_id, data_type, block=True, timeout=600):
1078 url = self.base_uri + "/api/datasets/{0}/commit-chunked-upload".format(dataset_id)
1079 headers = {"Authorization": "ApiKey " + self.api_key}
1080 params = {
1081 "uploadGroupId": chunk_id,
1082 "dataAddType": data_type,
1083 "sendCompletionEmail": not block,
1084 }
1085 res = requests.post(url, headers=headers, **self._request_params, params=params)
1086 if not res.ok:
1087 error_msg = self._try_extract_error_code(res)
1088 logger.error(error_msg)
1089 raise BoostedAPIException("Failed to commit dataset files: {0}.".format(error_msg))
1090
1091 if block:
1092 start_time = datetime.datetime.now()
1093 # Keep waiting until upload is no longer in UPDATING state...
1094 while True:
1095 result = self.check_dataset_ingestion_completion(dataset_id, chunk_id, start_time)
1096 if result["finished"]:
1097 break
1098
1099 if (datetime.datetime.now() - start_time).total_seconds() > timeout:
1100 err_str = (
1101 f"Timeout waiting for commit of dataset: {dataset_id} | chunk: {chunk_id}"
1102 )
1103 logger.error(err_str)
1104 return [], [err_str]
1105
1106 time.sleep(10)
1107 return result["warnings"], result["errors"]
1108 else:
1109 return [], []
1110
1111 def setup_chunk_and_upload_data(
1112 self,
1113 dataset_id,
1114 csv_data,
1115 data_type,
1116 timeout=600,
1117 block=True,
1118 no_exception_on_chunk_error=False,
1119 ):
1120 chunk_id = self.start_chunked_upload(dataset_id)
1121 logger.info("Obtained lock on dataset for upload: " + chunk_id)
1122 try:
1123 warnings, errors = self.chunk_and_upload_data(
1124 dataset_id, chunk_id, csv_data, timeout, no_exception_on_chunk_error
1125 )
1126 commit_warnings, commit_errors = self._commit_chunked_upload(
1127 dataset_id, chunk_id, data_type, block, timeout
1128 )
1129 return warnings + commit_warnings, errors + commit_errors
1130 except Exception:
1131 self.abort_chunked_upload(dataset_id, chunk_id)
1132 raise
1133
1134 def chunk_and_upload_data(
1135 self, dataset_id, chunk_id, csv_data, timeout=600, no_exception_on_chunk_error=False
1136 ):
1137 if isinstance(csv_data, pd.core.frame.DataFrame):
1138 if not isinstance(csv_data.index, pd.core.indexes.datetimes.DatetimeIndex):
1139 raise BoostedAPIException("DataFrame must have DatetimeIndex as index type.")
1140
1141 warnings = []
1142 errors = []
1143 logger.info("Uploading yearly.")
1144 for t in csv_data.index.to_period("Y").unique():
1145 if t is pd.NaT:
1146 continue
1147
1148 # serialize bit to string
1149 buf = self.get_csv_buffer()
1150 yearly_csv = csv_data.loc[str(t)]
1151 yearly_csv.to_csv(buf, header=True)
1152 raw_csv = buf.getvalue()
1153
1154 # we are already chunking yearly... but if the csv still exceeds a healthy
1155 # limit of 50mb the final line of defence is to ignore date boundaries and
1156 # just chunk the rows. This is mostly for the cloudflare upload limit.
1157 size_lim = 50 * 1000 * 1000
1158 est_csv_size = sys.getsizeof(raw_csv)
1159 if est_csv_size > size_lim:
1160 del raw_csv, buf
1161 logger.info("Yearly data too large for single upload, chunking further...")
1162 chunks = []
1163 nchunks = math.ceil(est_csv_size / size_lim)
1164 rows_per_chunk = math.ceil(len(yearly_csv) / nchunks)
1165 for i in range(0, len(yearly_csv), rows_per_chunk):
1166 buf = self.get_csv_buffer()
1167 split_csv = yearly_csv.iloc[i : i + rows_per_chunk]
1168 split_csv.to_csv(buf, header=True)
1169 split_csv = buf.getvalue()
1170 chunks.append(
1171 (
1172 "{0}-{1}".format(i + 1, min(len(yearly_csv), i + rows_per_chunk)),
1173 split_csv,
1174 )
1175 )
1176 else:
1177 chunks = [("all", raw_csv)]
1178
1179 for i, (rows_descriptor, chunk_csv) in enumerate(chunks):
1180 chunk_descriptor = "{0} in yearly chunk {1}".format(rows_descriptor, t)
1181 logger.info(
1182 "Uploading rows:"
1183 + chunk_descriptor
1184 + " (chunk {0} of {1}):".format(i + 1, len(chunks))
1185 )
1186 _, new_warnings, new_errors = self.upload_dataset_chunk(
1187 chunk_descriptor,
1188 dataset_id,
1189 chunk_id,
1190 chunk_csv,
1191 timeout,
1192 no_exception_on_chunk_error,
1193 )
1194 warnings.extend(new_warnings)
1195 errors.extend(new_errors)
1196 return warnings, errors
1197
1198 elif isinstance(csv_data, str):
1199 _, warnings, errors = self.upload_dataset_chunk(
1200 "all data", dataset_id, chunk_id, csv_data, timeout, no_exception_on_chunk_error
1201 )
1202 return warnings, errors
1203 else:
1204 raise BoostedAPIException("Expected CSV as str or Pandas DataFrame.")
1205
1206 def upload_dataset_chunk(
1207 self,
1208 chunk_descriptor,
1209 dataset_id,
1210 chunk_id,
1211 csv_data,
1212 timeout=600,
1213 no_exception_on_chunk_error=False,
1214 ):
1215 logger.info("Starting upload: " + chunk_descriptor)
1216 url = self.base_uri + "/api/datasets/{0}/upload-dataset-chunk".format(dataset_id)
1217 headers = {"Authorization": "ApiKey " + self.api_key}
1218 files_req = {}
1219 warnings = []
1220 errors = []
1221
1222 # make the network request
1223 target = ("uploaded_data.csv", csv_data, "text/csv")
1224 files_req["dataFile"] = target
1225 params = {"uploadGroupId": chunk_id}
1226 res = requests.post(
1227 url,
1228 params=params,
1229 files=files_req,
1230 headers=headers,
1231 timeout=timeout,
1232 **self._request_params,
1233 )
1234
1235 if res.ok:
1236 logger.info(
1237 (
1238 "Chunk upload completed. "
1239 "Ingestion started. "
1240 "Please wait until the data is in AVAILABLE state."
1241 )
1242 )
1243 if "warnings" in res.json():
1244 warnings = res.json()["warnings"]
1245 if len(warnings) > 0:
1246 logger.warning("Uploaded chunk encountered data warnings: ")
1247 for w in warnings:
1248 logger.warning(w)
1249 else:
1250 reason = "Upload failed: {0}, {1}".format(res.text, res.reason)
1251 logger.error(reason)
1252 if no_exception_on_chunk_error:
1253 errors.append(
1254 "Chunk {0} failed: {1}. ".format(chunk_descriptor, reason)
1255 + "Your data was only PARTIALLY uploaded. "
1256 + "Please reattempt the upload of this chunk."
1257 )
1258 else:
1259 raise BoostedAPIException(reason)
1260
1261 return res, warnings, errors
1262
1263 def getAllocationsForDate(self, portfolio_id, date, rollback_to_last_available_date):
1264 date = self.__iso_format(date)
1265 endpoint = "latest-allocations" if rollback_to_last_available_date else "allocations"
1266 url = self.base_uri + "/api/portfolios/{0}/{1}".format(portfolio_id, endpoint)
1267 headers = {"Authorization": "ApiKey " + self.api_key}
1268 params = {"date": date}
1269 logger.info("Retrieving allocations information for date {0}.".format(date))
1270 res = requests.get(url, params=params, headers=headers, **self._request_params)
1271 if res.ok:
1272 logger.info("Allocations retrieval successful.")
1273 return res.json()
1274 else:
1275 error_msg = self._try_extract_error_code(res)
1276 raise BoostedAPIException("Failed to retrieve allocations: {0}.".format(error_msg))
1277
1278 # New API method for fetching data from portfolio_holdings.pb2 file.
1279 def getAllocationsForDateV2(self, portfolio_id, date, rollback_to_last_available_date):
1280 date = self.__iso_format(date)
1281 endpoint = "latest-allocations-v2" if rollback_to_last_available_date else "allocations-v2"
1282 url = self.base_uri + "/api/portfolios/{0}/{1}".format(portfolio_id, endpoint)
1283 headers = {"Authorization": "ApiKey " + self.api_key}
1284 params = {"date": date}
1285 logger.info("Retrieving allocations information for date {0}.".format(date))
1286 res = requests.get(url, params=params, headers=headers, **self._request_params)
1287 if res.ok:
1288 logger.info("Allocations retrieval successful.")
1289 return res.json()
1290 else:
1291 error_msg = self._try_extract_error_code(res)
1292 raise BoostedAPIException("Failed to retrieve allocations: {0}.".format(error_msg))
1293
1294 def getAllocationsByDates(self, portfolio_id, dates=None):
1295 url = self.base_uri + "/api/portfolios/{0}/allocationsByDate".format(portfolio_id)
1296 headers = {"Authorization": "ApiKey " + self.api_key}
1297 if dates is not None:
1298 fmt_dates = []
1299 for d in dates:
1300 fmt_dates.append(self.__iso_format(d))
1301 fmt_dates_str = ",".join(fmt_dates)
1302 params: Dict = {"dates": fmt_dates_str}
1303 logger.info("Retrieving allocations information for dates {0}.".format(fmt_dates))
1304 else:
1305 params = {"dates": None}
1306 logger.info("Retrieving allocations information for all dates")
1307 res = requests.get(url, params=params, headers=headers, **self._request_params)
1308 if res.ok:
1309 logger.info("Allocations retrieval successful.")
1310 return res.json()
1311 else:
1312 error_msg = self._try_extract_error_code(res)
1313 raise BoostedAPIException("Failed to retrieve allocations: {0}.".format(error_msg))
1314
1315 def getSignalsForDate(self, portfolio_id, date, rollback_to_last_available_date):
1316 date = self.__iso_format(date)
1317 endpoint = "latest-signals" if rollback_to_last_available_date else "signals"
1318 url = self.base_uri + "/api/portfolios/{0}/{1}".format(portfolio_id, endpoint)
1319 headers = {"Authorization": "ApiKey " + self.api_key}
1320 params = {"date": date}
1321 logger.info("Retrieving signals information for date {0}.".format(date))
1322 res = requests.get(url, params=params, headers=headers, **self._request_params)
1323 if res.ok:
1324 logger.info("Signals retrieval successful.")
1325 return res.json()
1326 else:
1327 error_msg = self._try_extract_error_code(res)
1328 raise BoostedAPIException("Failed to retrieve signals: {0}.".format(error_msg))
1329
1330 def getSignalsForAllDates(self, portfolio_id, dates=None):
1331 url = self.base_uri + "/api/portfolios/{0}/signalsByDate".format(portfolio_id)
1332 headers = {"Authorization": "ApiKey " + self.api_key}
1333 params = {}
1334 if dates is not None:
1335 fmt_dates = []
1336 for d in dates:
1337 fmt_dates.append(self.__iso_format(d))
1338 fmt_dates_str = ",".join(fmt_dates)
1339 params = {"dates": fmt_dates_str}
1340 logger.info("Retrieving signals information for dates {0}.".format(fmt_dates))
1341 else:
1342 params = {"dates": None}
1343 logger.info("Retrieving signals information for all dates")
1344 res = requests.get(url, params=params, headers=headers, **self._request_params)
1345 if res.ok:
1346 logger.info("Signals retrieval successful.")
1347 return res.json()
1348 else:
1349 error_msg = self._try_extract_error_code(res)
1350 raise BoostedAPIException("Failed to retrieve signals: {0}.".format(error_msg))
1351
1352 def getEquityAccuracy(
1353 self,
1354 model_id: str,
1355 portfolio_id: str,
1356 tickers: List[str],
1357 start_date: Optional[BoostedDate] = None,
1358 end_date: Optional[BoostedDate] = None,
1359 ) -> Dict[str, Dict[str, Any]]:
1360 data: Dict[str, Any] = {}
1361 if start_date is not None:
1362 start_date = convert_date(start_date)
1363 data["startDate"] = start_date.isoformat()
1364 if end_date is not None:
1365 end_date = convert_date(end_date)
1366 data["endDate"] = end_date.isoformat()
1367
1368 if start_date and end_date:
1369 validate_start_and_end_dates(start_date, end_date)
1370
1371 tickers_stream = ",".join(tickers)
1372 data["tickers"] = tickers_stream
1373 data["timestamp"] = time.strftime("%H:%M:%S")
1374 data["shouldRecalc"] = True
1375 url = self.base_uri + f"/api/analysis/equity-accuracy/{model_id}/{portfolio_id}"
1376 headers = {"Authorization": "ApiKey " + self.api_key, "Content-Type": "application/json"}
1377
1378 logger.info(
1379 f"Retrieving equity accuracy data for date range {start_date} to {end_date} "
1380 f"for tickers: {tickers}."
1381 )
1382
1383 # Now create dataframes from the JSON output.
1384 metrics = [
1385 "hit_rate_mean",
1386 "hit_rate_median",
1387 "excess_return_mean",
1388 "excess_return_median",
1389 "return",
1390 "excess_return",
1391 ]
1392
1393 # send the request, retry if failed
1394 MAX_RETRIES = 10 # max of number of retries until timeout
1395 SLEEP_TIME = 3 # waiting time between requests
1396
1397 num_retries = 0
1398 success = False
1399 while not success and num_retries < MAX_RETRIES:
1400 res = requests.post(url, data=json.dumps(data), headers=headers, **self._request_params)
1401 if res.ok:
1402 logger.info("Equity Accuracy Data retrieval successful.")
1403 info = res.json()
1404 success = True
1405 else:
1406 data["shouldRecalc"] = False
1407 num_retries += 1
1408 time.sleep(SLEEP_TIME)
1409
1410 if not success:
1411 raise BoostedAPIException("Failed to retrieve equity accuracy: Request timeout.")
1412
1413 for ticker, accuracy_data in info.items():
1414 for metric in metrics:
1415 metric_matrix = accuracy_data[metric]
1416 if not isinstance(metric_matrix, str):
1417 # Set the index to the quintile label, and remove it from the data
1418 index = []
1419 for row in metric_matrix[1:]:
1420 index.append(row.pop(0))
1421
1422 # columns are "1D", "5D", etc.
1423 df = pd.DataFrame(metric_matrix[1:], columns=metric_matrix[0][1:], index=index)
1424 accuracy_data[metric] = df
1425 return info
1426
1427 def getHistoricalTradeDates(self, portfolio_id, start_date=None, end_date=None):
1428 end_date = self.__to_date_obj(end_date or datetime.date.today())
1429 start_date = self.__iso_format(start_date or (end_date - timedelta(days=365)))
1430 end_date = self.__iso_format(end_date)
1431
1432 url = self.base_uri + "/api/portfolios/{0}/tradingDates".format(portfolio_id)
1433 headers = {"Authorization": "ApiKey " + self.api_key}
1434 params = {"startDate": start_date, "endDate": end_date}
1435
1436 logger.info(
1437 "Retrieving historical trade dates data for date range {0} to {1}.".format(
1438 start_date, end_date
1439 )
1440 )
1441 res = requests.get(url, params=params, headers=headers, **self._request_params)
1442 if res.ok:
1443 logger.info("Trading dates retrieval successful.")
1444 return res.json()["dates"]
1445 else:
1446 error_msg = self._try_extract_error_code(res)
1447 raise BoostedAPIException("Failed to retrieve trading dates: {0}.".format(error_msg))
1448
1449 def getRankingsForAllDates(self, portfolio_id, dates=None):
1450 url = self.base_uri + "/api/portfolios/{0}/rankingsByDate".format(portfolio_id)
1451 headers = {"Authorization": "ApiKey " + self.api_key}
1452 params = {}
1453 if dates is not None:
1454 fmt_dates = []
1455 for d in dates:
1456 fmt_dates.append(self.__iso_format(d))
1457 fmt_dates_str = ",".join(fmt_dates)
1458 params = {"dates": fmt_dates_str}
1459 logger.info("Retrieving rankings information for date {0}.".format(fmt_dates_str))
1460 else:
1461 params = {"dates": None}
1462 logger.info("Retrieving rankings information for all dates")
1463 res = requests.get(url, params=params, headers=headers, **self._request_params)
1464 if res.ok:
1465 logger.info("Rankings retrieval successful.")
1466 return res.json()
1467 else:
1468 error_msg = self._try_extract_error_code(res)
1469 raise BoostedAPIException("Failed to retrieve rankings: {0}.".format(error_msg))
1470
1471 def getRankingsForDate(self, portfolio_id, date, rollback_to_last_available_date):
1472 date = self.__iso_format(date)
1473 endpoint = "latest-rankings" if rollback_to_last_available_date else "rankings"
1474 url = self.base_uri + "/api/{0}/{1}/{2}".format(endpoint, portfolio_id, date)
1475 headers = {"Authorization": "ApiKey " + self.api_key}
1476 logger.info("Retrieving rankings information for date {0}.".format(date))
1477 res = requests.get(url, headers=headers, **self._request_params)
1478 if res.ok:
1479 logger.info("Rankings retrieval successful.")
1480 return res.json()
1481 else:
1482 error_msg = self._try_extract_error_code(res)
1483 raise BoostedAPIException("Failed to retrieve rankings: {0}.".format(error_msg))
1484
1485 def sendModelRecalc(self, model_id):
1486 url = self.base_uri + "/api/models/{0}/recalc".format(model_id)
1487 logger.info("Sending model recalc request for model {0}".format(model_id))
1488 headers = {"Authorization": "ApiKey " + self.api_key}
1489 res = requests.put(url, headers=headers, **self._request_params)
1490 if not res.ok:
1491 error_msg = self._try_extract_error_code(res)
1492 logger.error(error_msg)
1493 raise BoostedAPIException(
1494 "Failed to send model recalc request - "
1495 + "the model in UI may be out of date: {0}.".format(error_msg)
1496 )
1497
1498 def sendRecalcAllModelPortfolios(self, model_id: str):
1499 """Recalculates all portfolios under a given model ID.
1500
1501 Args:
1502 model_id: the model ID
1503 Raises:
1504 BoostedAPIException: if the Boosted API request fails
1505 """
1506 url = self.base_uri + f"/api/models/{model_id}/recalc-all-portfolios"
1507 logger.info(f"Sending portfolio recalc requests for all portfolios under {model_id=}.")
1508 headers = {"Authorization": "ApiKey " + self.api_key}
1509 res = requests.put(url, headers=headers, **self._request_params)
1510 if not res.ok:
1511 error_msg = self._try_extract_error_code(res)
1512 logger.error(error_msg)
1513 raise BoostedAPIException(
1514 f"Failed to send recalc request for all portfolios under {model_id=} - {error_msg}."
1515 )
1516
1517 def sendPortfolioRecalc(self, portfolio_id: str):
1518 """Recalculates a single portfolio by its portfolio ID.
1519
1520 Args:
1521 portfolio_id: the portfolio ID to recalculate
1522 Raises:
1523 BoostedAPIException: if the Boosted API request fails
1524 """
1525 url = self.base_uri + "/api/graphql"
1526 logger.info(f"Sending portfolio recalc request for {portfolio_id=}.")
1527 headers = {"Authorization": "ApiKey " + self.api_key, "Content-Type": "application/json"}
1528 qry = """
1529 mutation recalcPortfolio($input: RecalculatePortfolioInput!) {
1530 recalculatePortfolio(input: $input) {
1531 success
1532 errors
1533 }
1534 }
1535 """
1536 req_json = {
1537 "query": qry,
1538 "variables": {"input": {"portfolioId": f"{portfolio_id}", "allowForceRecalc": "true"}},
1539 }
1540 res = requests.post(url, json=req_json, headers=headers, **self._request_params)
1541 if not res.ok or res.json().get("errors"):
1542 error_msg = self._try_extract_error_code(res)
1543 logger.error(error_msg)
1544 raise BoostedAPIException(
1545 f"Failed to send portfolio recalc request for {portfolio_id=} - {error_msg}."
1546 )
1547
1548 def add_uploaded_model_data(self, url, csv_data, request_data, timeout=600):
1549 logger.info("Starting upload.")
1550 headers = {"Authorization": "ApiKey " + self.api_key}
1551 files_req: Dict = {}
1552 target: Tuple[str, Any, str] = ("data.csv", None, "text/csv")
1553 warnings = []
1554 if isinstance(csv_data, pd.core.frame.DataFrame):
1555 buf = io.StringIO()
1556 csv_data.to_csv(buf, header=False)
1557 if not isinstance(csv_data.index, pd.core.indexes.datetimes.DatetimeIndex):
1558 raise BoostedAPIException("DataFrame must have DatetimeIndex as index type.")
1559 target = ("uploaded_data.csv", buf.getvalue(), "text/csv")
1560 files_req["dataFile"] = target
1561 res = requests.post(
1562 url,
1563 files=files_req,
1564 data=request_data,
1565 headers=headers,
1566 timeout=timeout,
1567 **self._request_params,
1568 )
1569 elif isinstance(csv_data, str):
1570 target = ("uploaded_data.csv", csv_data, "text/csv")
1571 files_req["dataFile"] = target
1572 res = requests.post(
1573 url,
1574 files=files_req,
1575 data=request_data,
1576 headers=headers,
1577 timeout=timeout,
1578 **self._request_params,
1579 )
1580 else:
1581 raise BoostedAPIException("Expected CSV as str or Pandas DataFrame.")
1582 if res.ok:
1583 logger.info("Signals upload completed.")
1584 result = res.json()["result"]
1585 if "warningMessages" in result:
1586 warnings = result["warningMessages"]
1587 else:
1588 error_str = "Signals upload failed: {0}, {1}".format(res.text, res.reason)
1589 logger.error(error_str)
1590 raise BoostedAPIException(error_str)
1591
1592 return res, warnings
1593
1594 def createSignalsModel(self, csv_data, model_name, timeout=600):
1595 warnings = []
1596 url = self.base_uri + "/api/models/upload/signals/create"
1597 request_data = {"modelName": model_name, "uploadName": model_name}
1598 res, warnings = self.add_uploaded_model_data(url, csv_data, request_data, timeout)
1599 result = res.json()["result"]
1600 model_id = result["modelId"]
1601 self.sendModelRecalc(model_id)
1602 return model_id, warnings
1603
1604 def addToUploadedModel(self, model_id, csv_data, timeout=600, recalc_model=True):
1605 warnings = []
1606 url = self.base_uri + "/api/models/{0}/upload/add-data".format(model_id)
1607 request_data: Dict = {}
1608 _, warnings = self.add_uploaded_model_data(url, csv_data, request_data, timeout)
1609 if recalc_model:
1610 self.sendModelRecalc(model_id)
1611 return warnings
1612
1613 def addSignalsToUploadedModel(
1614 self,
1615 model_id: str,
1616 csv_data: Union[pd.core.frame.DataFrame, str],
1617 timeout: int = 600,
1618 recalc_all: bool = False,
1619 recalc_portfolio_ids: Optional[List[str]] = None,
1620 ) -> List[str]:
1621 """
1622 Add signals to an uploaded model and then recalculate a random portfolio under that model.
1623
1624 Args:
1625 model_id: model ID
1626 csv_data: pandas DataFrame, or a string with signals to upload.
1627 timeout (optional): Timeout for initial upload request in seconds.
1628 recalc_all (optional): if True, recalculates all portfolios in the model.
1629 recalc_portfolio_ids (optional): List of portfolio IDs under the model to re-calculate.
1630 """
1631 warnings = self.addToUploadedModel(model_id, csv_data, timeout, recalc_model=False)
1632
1633 if recalc_all:
1634 self.sendRecalcAllModelPortfolios(model_id)
1635 elif recalc_portfolio_ids:
1636 for portfolio_id in recalc_portfolio_ids:
1637 self.sendPortfolioRecalc(portfolio_id)
1638 else:
1639 self.sendModelRecalc(model_id)
1640 return warnings
1641
1642 def getSignalsFromUploadedModel(self, model_id, date=None):
1643 date = self.__iso_format(date)
1644 url = self.base_uri + "/api/models/{0}/upload/signals".format(model_id)
1645 headers = {"Authorization": "ApiKey " + self.api_key}
1646 params = {"date": date}
1647 logger.info("Retrieving uploaded signals information")
1648 res = requests.get(url, params=params, headers=headers, **self._request_params)
1649 if res.ok:
1650 result = pd.DataFrame.from_dict(res.json()["result"])
1651 # ensure column order
1652 result = result[["date", "isin", "country", "currency", "weight"]]
1653 result["date"] = pd.to_datetime(result["date"], format="%Y-%m-%d")
1654 result = result.set_index("date")
1655 logger.info("Signals retrieval successful.")
1656 return result
1657 else:
1658 error_msg = self._try_extract_error_code(res)
1659 raise BoostedAPIException("Failed to retrieve signals: {0}.".format(error_msg))
1660
1661 def getPortfolioSettings(self, portfolio_id, timeout=600):
1662 url = self.base_uri + "/api/portfolio-settings/{0}".format(portfolio_id)
1663 headers = {"Authorization": "ApiKey " + self.api_key}
1664 res = requests.get(url, headers=headers, **self._request_params)
1665 if res.ok:
1666 return PortfolioSettings(res.json())
1667 else:
1668 error_msg = self._try_extract_error_code(res)
1669 logger.error(error_msg)
1670 raise BoostedAPIException(
1671 "Failed to retrieve portfolio settings: {0}.".format(error_msg)
1672 )
1673
1674 def createPortfolioWithPortfolioSettings(
1675 self, model_id, portfolio_name, portfolio_description, portfolio_settings, timeout=600
1676 ):
1677 url = self.base_uri + "/api/models/{0}/constraints/add".format(model_id)
1678 headers = {"Authorization": "ApiKey " + self.api_key, "Content-Type": "application/json"}
1679 setting_string = json.dumps(portfolio_settings.settings)
1680 logger.info("Creating new portfolio with specified setting: {}".format(setting_string))
1681 params = {
1682 "name": portfolio_name,
1683 "description": portfolio_description,
1684 "settings": setting_string,
1685 "validate": "true",
1686 }
1687 res = requests.put(url, json=params, headers=headers, **self._request_params)
1688 response = res.json()
1689 if res.ok:
1690 return response
1691 else:
1692 error_msg = self._try_extract_error_code(res)
1693 logger.error(error_msg)
1694 raise BoostedAPIException(
1695 "Failed to create portfolio with the specified settings: {0}.".format(error_msg)
1696 )
1697
1698 def getGbiIdFromIdentCountryCurrencyDate(
1699 self, ident_country_currency_dates: List[DateIdentCountryCurrency], timeout: int = 600
1700 ) -> List[Optional[GbiIdSecurity]]:
1701 url = self.base_uri + "/api/custom-stock-data/map-identifiers-simple"
1702 headers = {"Authorization": "ApiKey " + self.api_key, "Content-Type": "application/json"}
1703 identifiers = [
1704 {
1705 "row": idx,
1706 "date": identifier.date,
1707 "isin": identifier.identifier if identifier.id_type == ColumnSubRole.ISIN else None,
1708 "symbol": (
1709 identifier.identifier if identifier.id_type == ColumnSubRole.SYMBOL else None
1710 ),
1711 "countryPreference": identifier.country,
1712 "currencyPreference": identifier.currency,
1713 }
1714 for idx, identifier in enumerate(ident_country_currency_dates)
1715 ]
1716 params = json.dumps({"identifiers": identifiers})
1717 logger.info(
1718 "Retrieving GBI-ID mapping for {} identifier tuples...".format(
1719 len(ident_country_currency_dates)
1720 )
1721 )
1722 res = requests.post(url, data=params, headers=headers, **self._request_params)
1723
1724 if res.ok:
1725 result = res.json()
1726 warnings = result["warnings"]
1727 if warnings:
1728 for warning in warnings:
1729 logger.warn(f"Mapping warning: {warning}")
1730 gbiSecurities = []
1731 for idx, ident in enumerate(result["mappedIdentifiers"]):
1732 if ident is None:
1733 security = None
1734 else:
1735 security = GbiIdSecurity(
1736 ident["gbiId"],
1737 ident_country_currency_dates[idx],
1738 ident["symbol"],
1739 ident["companyName"],
1740 )
1741 gbiSecurities.append(security)
1742
1743 return gbiSecurities
1744 else:
1745 error_msg = self._try_extract_error_code(res)
1746 raise BoostedAPIException(
1747 "Failed to retrieve identifier mappings: {0}.".format(error_msg)
1748 )
1749
1750 # exists for backwards compatibility purposes.
1751 def getGbiIdFromIsinCountryCurrencyDate(self, isin_country_currency_dates, timeout=600):
1752 return self.getGbiIdFromIdentCountryCurrencyDate(
1753 ident_country_currency_dates=isin_country_currency_dates, timeout=timeout
1754 )
1755
1756 # model_id: str
1757 # returns: Dict[str, str] representing the translation from the rankings ID (feature refs)
1758 # to human readable names
1759 def __get_rankings_ref_translation(self, model_id: str) -> Dict[str, str]:
1760 headers = {"Authorization": "ApiKey " + self.api_key, "Content-Type": "application/json"}
1761 feature_name_url = f"/api/models/{model_id}/advanced-explain/translate-feature-ref/"
1762 feature_name_res = requests.post(
1763 self.base_uri + feature_name_url,
1764 data=json.dumps({}),
1765 headers=headers,
1766 **self._request_params,
1767 )
1768
1769 if feature_name_res.ok:
1770 feature_name_dict = feature_name_res.json()
1771 return {
1772 id: "-".join(
1773 [names["variable_name"], names["transform_name"], names["normalization_name"]]
1774 )
1775 for id, names in feature_name_dict.items()
1776 }
1777 else:
1778 raise Exception(
1779 """Failed to get feature names for model,
1780 this model doesn't fully support rankings 2.0"""
1781 )
1782
1783 def getDatasetDates(self, dataset_id):
1784 url = self.base_uri + f"/api/datasets/{dataset_id}"
1785 headers = {"Authorization": "ApiKey " + self.api_key}
1786 res = requests.get(url, headers=headers, **self._request_params)
1787 if res.ok:
1788 dataset = res.json()
1789 valid_to_array = dataset.get("validTo")
1790 valid_to_date = None
1791 valid_from_array = dataset.get("validFrom")
1792 valid_from_date = None
1793 if valid_to_array:
1794 valid_to_date = datetime.date(
1795 valid_to_array[0], valid_to_array[1], valid_to_array[2]
1796 )
1797 if valid_from_array:
1798 valid_from_date = datetime.date(
1799 valid_from_array[0], valid_from_array[1], valid_from_array[2]
1800 )
1801 return {"validTo": valid_to_date, "validFrom": valid_from_date}
1802 else:
1803 error_msg = self._try_extract_error_code(res)
1804 logger.error(error_msg)
1805 raise BoostedAPIException("Failed to query dataset: {0}.".format(error_msg))
1806
1807 def getRankingAnalysis(self, model_id, date):
1808 url = (
1809 self.base_uri
1810 + f"/api/explain-trades/analysis/{model_id}/{self.__iso_format(date)}/json"
1811 )
1812 headers = {"Authorization": "ApiKey " + self.api_key}
1813 analysis_res = requests.get(url, headers=headers, **self._request_params)
1814 if analysis_res.ok:
1815 ranking_dict = analysis_res.json()
1816 feature_name_dict = self.__get_rankings_ref_translation(model_id)
1817 columns = [feature_name_dict[col] for col in ranking_dict["columns"]]
1818
1819 df = protoCubeJsonDataToDataFrame(
1820 ranking_dict["data"],
1821 "Data Buckets",
1822 ranking_dict["rows"],
1823 "Feature Names",
1824 columns,
1825 ranking_dict["fields"],
1826 )
1827 return df
1828 else:
1829 error_msg = self._try_extract_error_code(analysis_res)
1830 logger.error(error_msg)
1831 raise BoostedAPIException("Failed to get ranking analysis: {0}.".format(error_msg))
1832
1833 def getExplainForPortfolio(
1834 self,
1835 model_id,
1836 portfolio_id,
1837 date,
1838 index_by_symbol: bool = False,
1839 index_by_all_metadata: bool = False,
1840 ):
1841 """
1842 Gets the ranking 2.0 explain data for the given model on the given date
1843 filtered by portfolio.
1844
1845 Parameters
1846 ----------
1847 model_id: str
1848 Model ID. Model IDs can be retrieved by clicking on the copy to clipboard
1849 button next to your model's name in the Model Summary Page in Boosted
1850 Insights.
1851 portfolio_id: str
1852 Portfolio ID. Portfolio IDs can be retrieved from portfolio's configuration page.
1853 date: datetime.date or YYYY-MM-DD string
1854 Date of the data to retrieve.
1855 index_by_symbol: bool
1856 If true, index by stock symbol instead of ISIN.
1857 index_by_all_metadata: bool
1858 If true, index by all metadata: ISIN, stock symbol, currency, and country.
1859 Overrides index_by_symbol.
1860
1861 Returns
1862 -------
1863 pandas.DataFrame
1864 Pandas DataFrame containing your data indexed by ISINs/Symbol/all metadata
1865 and feature names, filtered by portfolio.
1866 ___
1867 """
1868 indices = ["Symbol", "ISINs", "Country", "Currency"]
1869 raw_explain_df = self.getRankingExplain(
1870 model_id, date, index_by_symbol=False, index_by_all_metadata=True
1871 )
1872 pa_ratings_dict = self.getRankingsForDate(portfolio_id, date, False)
1873
1874 ratings = pa_ratings_dict["rankings"]
1875 ratings_df = pd.DataFrame(ratings)
1876 ratings_df = ratings_df[["symbol", "isin", "country", "currency"]]
1877 ratings_df.columns = pd.Index(indices)
1878 ratings_df.set_index(indices, inplace=True)
1879
1880 # inner join to only get the securities in both data frames
1881 result_df = raw_explain_df.merge(ratings_df, left_index=True, right_index=True, how="inner")
1882
1883 # set index based on input parameters
1884 if index_by_symbol and not index_by_all_metadata:
1885 result_df = result_df.reset_index()
1886 result_df = result_df.drop(columns=["ISINs", "Currency", "Country"])
1887 result_df.set_index(["Symbol", "Feature Names"], inplace=True)
1888 elif not index_by_symbol and not index_by_all_metadata:
1889 result_df = result_df.reset_index()
1890 result_df = result_df.drop(columns=["Symbol", "Currency", "Country"])
1891 result_df.set_index(["ISINs", "Feature Names"], inplace=True)
1892
1893 return result_df
1894
1895 def getRankingExplain(
1896 self, model_id, date, index_by_symbol: bool = False, index_by_all_metadata: bool = False
1897 ):
1898 """
1899 Gets the ranking 2.0 explain data for the given model on the given date
1900
1901 Parameters
1902 ----------
1903 model_id: str
1904 Model ID. Model IDs can be retrieved by clicking on the copy to clipboard
1905 button next to your model's name in the Model Summary Page in Boosted
1906 Insights.
1907 date: datetime.date or YYYY-MM-DD string
1908 Date of the data to retrieve.
1909 index_by_symbol: bool
1910 If true, index by stock symbol instead of ISIN.
1911 index_by_all_metadata: bool
1912 If true, index by all metadata: ISIN, stock symbol, currency, and country.
1913 Overrides index_by_symbol.
1914
1915 Returns
1916 -------
1917 pandas.DataFrame
1918 Pandas DataFrame containing your data indexed by ISINs/Symbol/all metadata
1919 and feature names.
1920 ___
1921 """
1922 url = (
1923 self.base_uri + f"/api/explain-trades/explain/{model_id}/{self.__iso_format(date)}/json"
1924 )
1925 headers = {"Authorization": "ApiKey " + self.api_key, "Content-Type": "application/json"}
1926 explain_res = requests.get(url, headers=headers, **self._request_params)
1927 if explain_res.ok:
1928 ranking_dict = explain_res.json()
1929 rows = ranking_dict["rows"]
1930 stock_summary_url = f"/api/stock-summaries/{model_id}"
1931 stock_summary_body = {"gbiIds": ranking_dict["rows"]}
1932 summary_res = requests.post(
1933 self.base_uri + stock_summary_url,
1934 data=json.dumps(stock_summary_body),
1935 headers=headers,
1936 **self._request_params,
1937 )
1938 if summary_res.ok:
1939 stock_summary = summary_res.json()
1940 if index_by_symbol:
1941 rows = [stock_summary[row]["symbol"] for row in ranking_dict["rows"]]
1942 elif index_by_all_metadata:
1943 rows = [
1944 [
1945 stock_summary[row]["isin"],
1946 stock_summary[row]["symbol"],
1947 stock_summary[row]["currency"],
1948 stock_summary[row]["country"],
1949 ]
1950 for row in ranking_dict["rows"]
1951 ]
1952 else:
1953 rows = [stock_summary[row]["isin"] for row in ranking_dict["rows"]]
1954 else:
1955 error_msg = self._try_extract_error_code(summary_res)
1956 logger.error(error_msg)
1957 raise BoostedAPIException(
1958 "Failed to get isin information ranking explain: {0}.".format(error_msg)
1959 )
1960
1961 feature_name_dict = self.__get_rankings_ref_translation(model_id)
1962 columns = [feature_name_dict[col] for col in ranking_dict["columns"]]
1963
1964 id_col_name = "Symbols" if index_by_symbol else "ISINs"
1965
1966 if index_by_all_metadata:
1967 pc_list = []
1968 pf = ranking_dict["data"]
1969 for row_idx, row in enumerate(rows):
1970 for col_idx, col in enumerate(columns):
1971 pc_list.append([row, col] + pf[row_idx]["columns"][col_idx]["fields"])
1972 df = pd.DataFrame(pc_list)
1973 df = df.set_axis(
1974 ["Metadata", "Feature Names"] + ranking_dict["fields"], axis="columns"
1975 )
1976
1977 metadata_df = df["Metadata"].apply(pd.Series)
1978 metadata_df.columns = pd.Index(["ISINs", "Symbol", "Currency", "Country"])
1979 result_df = pd.concat([metadata_df, df], axis=1).drop("Metadata", axis=1)
1980 result_df.set_index(
1981 ["ISINs", "Symbol", "Currency", "Country", "Feature Names"], inplace=True
1982 )
1983 return result_df
1984
1985 else:
1986 df = protoCubeJsonDataToDataFrame(
1987 ranking_dict["data"],
1988 id_col_name,
1989 rows,
1990 "Feature Names",
1991 columns,
1992 ranking_dict["fields"],
1993 )
1994
1995 return df
1996 else:
1997 error_msg = self._try_extract_error_code(explain_res)
1998 logger.error(error_msg)
1999 raise BoostedAPIException("Failed to get ranking explain: {0}.".format(error_msg))
2000
2001 def getDenseSignalsForDate(self, portfolio_id, date, rollback_to_last_available_date):
2002 date = self.__iso_format(date)
2003 url = self.base_uri + f"/api/portfolios/{portfolio_id}/denseSignalsByDate"
2004 headers = {"Authorization": "ApiKey " + self.api_key}
2005 params = {
2006 "startDate": date,
2007 "endDate": date,
2008 "rollbackToMostRecentDate": rollback_to_last_available_date,
2009 }
2010 logger.info("Retrieving dense signals information for date {0}.".format(date))
2011 res = requests.get(url, params=params, headers=headers, **self._request_params)
2012 if res.ok:
2013 logger.info("Signals retrieval successful.")
2014 d = res.json()
2015 # reshape date to output format
2016 date = list(d["signals"].keys())[0]
2017 model_id = d["model_id"]
2018 signals_list = list(d["signals"].values())[0]
2019 return {"date": date, "signals": [{"model_id": model_id, "signals_info": signals_list}]}
2020 else:
2021 error_msg = self._try_extract_error_code(res)
2022 raise BoostedAPIException("Failed to retrieve dense signals: {0}.".format(error_msg))
2023
2024 def getDenseSignals(self, model_id, portfolio_id, file_name=None, location="./"):
2025 url = self.base_uri + f"/api/models/{model_id}/{portfolio_id}/dense-signals"
2026 headers = {"Authorization": "ApiKey " + self.api_key}
2027 res = requests.get(url, headers=headers, **self._request_params)
2028 if file_name is None:
2029 file_name = f"{model_id}-{portfolio_id}_dense_signals.csv"
2030 download_location = os.path.join(location, file_name)
2031 if res.ok:
2032 with open(download_location, "wb") as file:
2033 file.write(res.content)
2034 print("Download Complete")
2035 elif res.status_code == 404:
2036 raise BoostedAPIException(
2037 f"""Dense Singals file does not exist for model:
2038 {model_id} - portfolio: {portfolio_id}"""
2039 )
2040 else:
2041 error_msg = self._try_extract_error_code(res)
2042 logger.error(error_msg)
2043 raise BoostedAPIException(
2044 f"""Failed to download dense singals file for model:
2045 {model_id} - portfolio: {portfolio_id}"""
2046 )
2047
2048 def _getIsPortfolioReadyForProcessing(self, model_id, portfolio_id, formatted_date):
2049 headers = {"Authorization": "ApiKey " + self.api_key}
2050 url = (
2051 self.base_uri
2052 + f"/api/explain-trades/{model_id}/{portfolio_id}"
2053 + f"/is-ready-for-processing/{formatted_date}"
2054 )
2055 res = requests.get(url, headers=headers, **self._request_params)
2056
2057 try:
2058 if res.ok:
2059 body = res.json()
2060 if "ready" in body:
2061 if body["ready"]:
2062 return True, ""
2063 else:
2064 reason_from_api = (
2065 body["notReadyReason"] if "notReadyReason" in body else "Unavailable"
2066 )
2067
2068 returned_reason = reason_from_api
2069
2070 if returned_reason == "SKIP":
2071 returned_reason = "holiday- market closed"
2072
2073 if returned_reason == "WAITING":
2074 returned_reason = "calculations pending"
2075
2076 return False, returned_reason
2077 else:
2078 return False, "Unavailable"
2079 else:
2080 error_msg = self._try_extract_error_code(res)
2081 logger.error(error_msg)
2082 raise BoostedAPIException(
2083 f"""Failed to generate file for model:
2084 {model_id} - portfolio: {portfolio_id} on date: {formatted_date}"""
2085 )
2086 except Exception as e:
2087 raise BoostedAPIException(
2088 f"""Failed to generate file for model:
2089 {model_id} - portfolio: {portfolio_id} on date: {formatted_date} {e}"""
2090 )
2091
2092 def getRanking2DateAnalysisFile(
2093 self, model_id, portfolio_id, date, file_name=None, location="./"
2094 ):
2095 formatted_date = self.__iso_format(date)
2096 s3_file_name = f"{formatted_date}_analysis.xlsx"
2097 download_url = (
2098 self.base_uri + f"/api/models/{model_id}/{portfolio_id}/ranking-file/{s3_file_name}"
2099 )
2100 headers = {"Authorization": "ApiKey " + self.api_key}
2101 if file_name is None:
2102 file_name = f"{model_id}-{portfolio_id}_statistical_analysis_{formatted_date}.xlsx"
2103 download_location = os.path.join(location, file_name)
2104
2105 res = requests.get(download_url, headers=headers, **self._request_params)
2106 if res.ok:
2107 with open(download_location, "wb") as file:
2108 file.write(res.content)
2109 print("Download Complete")
2110 elif res.status_code == 404:
2111 (
2112 is_portfolio_ready_for_processing,
2113 portfolio_ready_status,
2114 ) = self._getIsPortfolioReadyForProcessing(model_id, portfolio_id, formatted_date)
2115
2116 if not is_portfolio_ready_for_processing:
2117 logger.info(
2118 f"""\nPortfolio {portfolio_id} for model {model_id}
2119 on date {date} unavailable for Ranking2Date Analysis file.
2120 Status: {portfolio_ready_status}\n"""
2121 )
2122 return
2123
2124 generate_url = (
2125 self.base_uri
2126 + f"/api/explain-trades/{model_id}/{portfolio_id}"
2127 + f"/generate/date-data/{formatted_date}"
2128 )
2129
2130 generate_res = requests.get(generate_url, headers=headers, **self._request_params)
2131 if generate_res.ok:
2132 download_res = requests.get(download_url, headers=headers, **self._request_params)
2133 while download_res.status_code == 404 or (
2134 download_res.ok and len(download_res.content) == 0
2135 ):
2136 print("waiting for file to be generated")
2137 time.sleep(5)
2138 download_res = requests.get(
2139 download_url, headers=headers, **self._request_params
2140 )
2141 if download_res.ok:
2142 with open(download_location, "wb") as file:
2143 file.write(download_res.content)
2144 print("Download Complete")
2145 else:
2146 error_msg = self._try_extract_error_code(res)
2147 logger.error(error_msg)
2148 raise BoostedAPIException(
2149 f"""Failed to generate ranking analysis file for model:
2150 {model_id} - portfolio: {portfolio_id} on date: {formatted_date}"""
2151 )
2152 else:
2153 error_msg = self._try_extract_error_code(res)
2154 logger.error(error_msg)
2155 raise BoostedAPIException(
2156 f"""Failed to download ranking analysis file for model:
2157 {model_id} - portfolio: {portfolio_id} on date: {formatted_date}"""
2158 )
2159
2160 def getRanking2DateExplainFile(
2161 self,
2162 model_id,
2163 portfolio_id,
2164 date,
2165 file_name=None,
2166 location="./",
2167 overwrite: bool = False,
2168 index_by_all_metadata: bool = False,
2169 ):
2170 """
2171 Downloads the ranking explain file for the provided portfolio and model.
2172 If no file exists then it will send a request to generate the file and continuously
2173 poll the server every 5 seconds to try and download the file until the file is downloaded.
2174
2175 Parameters
2176 ----------
2177 model_id: str
2178 Model ID. Model IDs can be retrieved by clicking on the copy to clipboard
2179 button next to your model's name in the Model Summary Page in Boosted
2180 Insights.
2181 portfolio_id: str
2182 Portfolio ID. Portfolio IDs can be retrieved from portfolio's configuration page.
2183 date: datetime.date or YYYY-MM-DD string
2184 Date of the data to retrieve.
2185 file_name: str
2186 File name of the dense signals file to save as.
2187 If no file name is given the file name will be
2188 "<model_id>-<portfolio_id>_explain_data_<date>.xlsx"
2189 location: str
2190 The location to save the file to.
2191 If no location is given then it will be saved to the current directory.
2192 overwrite: bool
2193 Defaults to False, set to True to regenerate the file.
2194 index_by_all_metadata: bool
2195 If true, index by all metadata: ISIN, stock symbol, currency, and country.
2196
2197
2198 Returns
2199 -------
2200 None
2201 ___
2202 """
2203 formatted_date = self.__iso_format(date)
2204 if index_by_all_metadata:
2205 s3_file_name = f"{formatted_date}_explaindata_withmetadata.xlsx"
2206 else:
2207 s3_file_name = f"{formatted_date}_explaindata.xlsx"
2208 download_url = (
2209 self.base_uri + f"/api/models/{model_id}/{portfolio_id}/ranking-file/{s3_file_name}"
2210 )
2211 headers = {"Authorization": "ApiKey " + self.api_key}
2212 if file_name is None:
2213 file_name = f"{model_id}-{portfolio_id}_explain_data_{formatted_date}.xlsx"
2214 download_location = os.path.join(location, file_name)
2215
2216 if not overwrite:
2217 res = requests.get(download_url, headers=headers, **self._request_params)
2218 if not overwrite and res.ok:
2219 with open(download_location, "wb") as file:
2220 file.write(res.content)
2221 print("Download Complete")
2222 elif overwrite or res.status_code == 404:
2223 (
2224 is_portfolio_ready_for_processing,
2225 portfolio_ready_status,
2226 ) = self._getIsPortfolioReadyForProcessing(model_id, portfolio_id, formatted_date)
2227
2228 if not is_portfolio_ready_for_processing:
2229 logger.info(
2230 f"""\nPortfolio {portfolio_id} for model {model_id}
2231 on date {date} unavailable for Ranking2Date Explain file.
2232 Status: {portfolio_ready_status}\n"""
2233 )
2234 return
2235
2236 generate_url = (
2237 self.base_uri
2238 + f"/api/explain-trades/{model_id}/{portfolio_id}"
2239 + f"/generate/date-data/{formatted_date}"
2240 + f"/{'true' if index_by_all_metadata else 'false'}"
2241 )
2242
2243 generate_res = requests.get(generate_url, headers=headers, **self._request_params)
2244 if generate_res.ok:
2245 download_res = requests.get(download_url, headers=headers, **self._request_params)
2246 while download_res.status_code == 404 or (
2247 download_res.ok and len(download_res.content) == 0
2248 ):
2249 print("waiting for file to be generated")
2250 time.sleep(5)
2251 download_res = requests.get(
2252 download_url, headers=headers, **self._request_params
2253 )
2254 if download_res.ok:
2255 with open(download_location, "wb") as file:
2256 file.write(download_res.content)
2257 print("Download Complete")
2258 else:
2259 error_msg = self._try_extract_error_code(res)
2260 logger.error(error_msg)
2261 raise BoostedAPIException(
2262 f"""Failed to generate ranking explain file for model:
2263 {model_id} - portfolio: {portfolio_id} on date: {formatted_date}"""
2264 )
2265 else:
2266 error_msg = self._try_extract_error_code(res)
2267 logger.error(error_msg)
2268 raise BoostedAPIException(
2269 f"""Failed to download ranking explain file for model:
2270 {model_id} - portfolio: {portfolio_id} on date: {formatted_date}"""
2271 )
2272
2273 def getRanking2DateExplain(
2274 self,
2275 model_id: str,
2276 portfolio_id: str,
2277 date: Optional[datetime.date],
2278 overwrite: bool = False,
2279 ) -> Dict[str, pd.DataFrame]:
2280 """
2281 Wrapper around getRanking2DateExplainFile, but returns a pandas
2282 dataframe instead of downloading to a path. Dataframe is indexed by
2283 symbol and should always have 'rating' and 'rating_delta' columns. Other
2284 columns will be determined by model's features.
2285 """
2286 file_name = "explaindata.xlsx"
2287 with tempfile.TemporaryDirectory() as tmpdirname:
2288 self.getRanking2DateExplainFile(
2289 model_id=model_id,
2290 portfolio_id=portfolio_id,
2291 date=date,
2292 file_name=file_name,
2293 location=tmpdirname,
2294 overwrite=overwrite,
2295 )
2296 full_path = os.path.join(tmpdirname, file_name)
2297 excel_file = pd.ExcelFile(full_path)
2298 df_map = pd.read_excel(excel_file, sheet_name=None)
2299 df_map_final = {str(sheet): df.set_index("Symbol") for (sheet, df) in df_map.items()}
2300
2301 return df_map_final
2302
2303 def getTearSheet(self, model_id, portfolio_id, start_date=None, end_date=None, block=False):
2304 if start_date is None or end_date is None:
2305 if start_date is not None or end_date is not None:
2306 raise ValueError("start_date and end_date must both be None or both be defined")
2307 return self._getCurrentTearSheet(model_id, portfolio_id)
2308
2309 start_date_obj = self.__to_date_obj(start_date)
2310 end_date_obj = self.__to_date_obj(end_date)
2311 if start_date_obj >= end_date_obj:
2312 raise ValueError("end_date must be later than the start_date")
2313
2314 # get for the given date
2315 url = self.base_uri + f"/api/analysis/keyfacts/{model_id}/{portfolio_id}"
2316 data = {
2317 "startDate": self.__iso_format(start_date),
2318 "endDate": self.__iso_format(end_date),
2319 "shouldRecalc": True,
2320 }
2321 headers = {"Authorization": "ApiKey " + self.api_key, "Content-Type": "application/json"}
2322 res = requests.post(url, data=json.dumps(data), headers=headers, **self._request_params)
2323 if res.status_code == 404 and block:
2324 retries = 0
2325 data["shouldRecalc"] = False
2326 while retries < 10:
2327 time.sleep(10)
2328 retries += 1
2329 res = requests.post(
2330 url, data=json.dumps(data), headers=headers, **self._request_params
2331 )
2332 if res.status_code != 404:
2333 break
2334 if res.ok:
2335 return res.json()
2336 else:
2337 error_msg = self._try_extract_error_code(res)
2338 logger.error(error_msg)
2339 raise BoostedAPIException(
2340 "Failed to get tear sheet data: {0} {1}.".format(error_msg, str(res.status_code))
2341 )
2342
2343 def _getCurrentTearSheet(self, model_id, portfolio_id):
2344 url = self.base_uri + f"/api/model-summaries/{model_id}/{portfolio_id}"
2345 headers = {"Authorization": "ApiKey " + self.api_key, "Content-Type": "application/json"}
2346 res = requests.get(url, headers=headers, **self._request_params)
2347 if res.ok:
2348 json = res.json()
2349 return json.get("tearSheet", {})
2350 else:
2351 error_msg = self._try_extract_error_code(res)
2352 logger.error(error_msg)
2353 raise BoostedAPIException("Failed to get tear sheet data: {0}.".format(error_msg))
2354
2355 def getPortfolioStatus(self, model_id, portfolio_id, job_date):
2356 url = (
2357 self.base_uri
2358 + f"/api/analysis/portfolioStatus/{model_id}/{portfolio_id}?jobDate={job_date}"
2359 )
2360 headers = {"Authorization": "ApiKey " + self.api_key, "Content-Type": "application/json"}
2361 res = requests.get(url, headers=headers, **self._request_params)
2362 if res.ok:
2363 result = res.json()
2364 return {
2365 "is_complete": result["status"],
2366 "last_update": None if result["lastUpdate"] is None else result["lastUpdate"][:10],
2367 "next_update": None if result["nextUpdate"] is None else result["nextUpdate"][:10],
2368 }
2369 else:
2370 error_msg = self._try_extract_error_code(res)
2371 logger.error(error_msg)
2372 raise BoostedAPIException("Failed to get portfolio status: {0}".format(error_msg))
2373
2374 def _query_portfolio_factor_attribution(
2375 self,
2376 portfolio_id: str,
2377 start_date: Optional[BoostedDate] = None,
2378 end_date: Optional[BoostedDate] = None,
2379 ):
2380 response = self._get_graphql(
2381 query=graphql_queries.GET_PORTFOLIO_FACTOR_ATTRIBUTION_QUERY,
2382 variables={
2383 "portfolioId": portfolio_id,
2384 "startDate": str(start_date) if start_date else None,
2385 "endDate": str(end_date) if start_date else None,
2386 },
2387 error_msg_prefix="Failed to get factor attribution: ",
2388 )
2389 return response
2390
2391 def get_portfolio_factor_attribution(
2392 self,
2393 portfolio_id: str,
2394 start_date: Optional[BoostedDate] = None,
2395 end_date: Optional[BoostedDate] = None,
2396 ):
2397 """Get portfolio factor attribution for a portfolio
2398
2399 Args:
2400 portfolio_id (str): a valid UUID string
2401 start_date (BoostedDate, optional): The start date. Defaults to None.
2402 end_date (BoostedDate, optional): The end date. Defaults to None.
2403 """
2404 response = self._query_portfolio_factor_attribution(portfolio_id, start_date, end_date)
2405 factor_attribution = response["data"]["portfolio"]["factorAttribution"]
2406 dates = pd.DatetimeIndex(data=factor_attribution["dates"])
2407 beta = factor_attribution["factorBetas"]
2408 beta_df = pd.DataFrame(index=dates, data={x["name"]: x["data"] for x in beta})
2409 beta_df = beta_df.add_suffix("_beta")
2410 returns = factor_attribution["portfolioFactorPerformance"]
2411 returns_df = pd.DataFrame(index=dates, data={x["name"]: x["data"] for x in returns})
2412 returns_df = returns_df.add_suffix("_return")
2413 returns_df = (returns_df - 1) * 100
2414
2415 final_df = pd.concat([returns_df, beta_df], axis=1)
2416 ordered_columns = list(itertools.chain(*zip(returns_df.columns, beta_df.columns)))
2417 ordered_final_df = final_df.reindex(columns=ordered_columns)
2418
2419 # Add the column `total_return` which is the sum of returns_data
2420 ordered_final_df["total_return"] = returns_df.sum(axis=1)
2421 return ordered_final_df
2422
2423 def getBlacklist(self, blacklist_id):
2424 url = self.base_uri + f"/api/blacklist/{blacklist_id}"
2425 headers = {"Authorization": "ApiKey " + self.api_key}
2426 res = requests.get(url, headers=headers, **self._request_params)
2427 if res.ok:
2428 result = res.json()
2429 return result
2430 error_msg = self._try_extract_error_code(res)
2431 logger.error(error_msg)
2432 raise BoostedAPIException(f"Failed to get blacklist with id {blacklist_id}: {error_msg}")
2433
2434 def getBlacklists(self, model_id=None, company_id=None, last_N=None):
2435 params = {}
2436 if last_N:
2437 params["lastN"] = last_N
2438 if model_id:
2439 params["modelId"] = model_id
2440 if company_id:
2441 params["companyId"] = company_id
2442 url = self.base_uri + f"/api/blacklist"
2443 headers = {"Authorization": "ApiKey " + self.api_key}
2444 res = requests.get(url, headers=headers, params=params, **self._request_params)
2445 if res.ok:
2446 result = res.json()
2447 return result
2448 error_msg = self._try_extract_error_code(res)
2449 logger.error(error_msg)
2450 raise BoostedAPIException(
2451 f"""Failed to get blacklists with \
2452 model_id {model_id} company_id {company_id} last_N {last_N}: {error_msg}"""
2453 )
2454
2455 def createBlacklist(
2456 self,
2457 isin,
2458 long_short=2,
2459 start_date=datetime.date.today(),
2460 end_date="4000-01-01",
2461 model_id=None,
2462 ):
2463 url = self.base_uri + f"/api/blacklist"
2464 data = {
2465 "modelId": model_id,
2466 "isin": isin,
2467 "longShort": long_short,
2468 "startDate": self.__iso_format(start_date),
2469 "endDate": self.__iso_format(end_date),
2470 }
2471 headers = {"Authorization": "ApiKey " + self.api_key, "Content-Type": "application/json"}
2472 res = requests.post(url, data=json.dumps(data), headers=headers, **self._request_params)
2473 if res.ok:
2474 return res.json()
2475 else:
2476 error_msg = self._try_extract_error_code(res)
2477 logger.error(error_msg)
2478 raise BoostedAPIException(
2479 f"""Failed to create the blacklist with \
2480 isin {isin} long_short {long_short} start_date {start_date} end_date {end_date} \
2481 model_id {model_id}: {error_msg}."""
2482 )
2483
2484 def createBlacklistsFromCSV(self, csv_name):
2485 url = self.base_uri + f"/api/blacklists"
2486 data = []
2487 with open(csv_name, mode="r") as f:
2488 csv_reader = csv.DictReader(f)
2489 for row in csv_reader:
2490 blacklist = {"modelId": row["ModelID"], "isin": row["ISIN"]}
2491 if not row.get("LongShort"):
2492 blacklist["longShort"] = 2
2493 else:
2494 blacklist["longShort"] = row["LongShort"]
2495
2496 if not row.get("StartDate"):
2497 blacklist["startDate"] = self.__iso_format(datetime.date.today())
2498 else:
2499 blacklist["startDate"] = self.__iso_format(row["StartDate"])
2500
2501 if not row.get("EndDate"):
2502 blacklist["endDate"] = self.__iso_format("4000-01-01")
2503 else:
2504 blacklist["endDate"] = self.__iso_format(row["EndDate"])
2505 data.append(blacklist)
2506 print(f"Processed {len(data)} blacklists.")
2507 headers = {"Authorization": "ApiKey " + self.api_key, "Content-Type": "application/json"}
2508 res = requests.post(url, data=json.dumps(data), headers=headers, **self._request_params)
2509 if res.ok:
2510 return res.json()
2511 else:
2512 error_msg = self._try_extract_error_code(res)
2513 logger.error(error_msg)
2514 raise BoostedAPIException("failed to create blacklists")
2515
2516 def updateBlacklist(self, blacklist_id, long_short=None, start_date=None, end_date=None):
2517 params = {}
2518 if long_short:
2519 params["longShort"] = long_short
2520 if start_date:
2521 params["startDate"] = start_date
2522 if end_date:
2523 params["endDate"] = end_date
2524 url = self.base_uri + f"/api/blacklist/{blacklist_id}"
2525 headers = {"Authorization": "ApiKey " + self.api_key, "Content-Type": "application/json"}
2526 res = requests.patch(url, json=params, headers=headers, **self._request_params)
2527 if res.ok:
2528 return res.json()
2529 else:
2530 error_msg = self._try_extract_error_code(res)
2531 logger.error(error_msg)
2532 raise BoostedAPIException(
2533 f"Failed to update blacklist with id {blacklist_id}: {error_msg}"
2534 )
2535
2536 def deleteBlacklist(self, blacklist_id):
2537 url = self.base_uri + f"/api/blacklist/{blacklist_id}"
2538 headers = {"Authorization": "ApiKey " + self.api_key}
2539 res = requests.delete(url, headers=headers, **self._request_params)
2540 if res.ok:
2541 result = res.json()
2542 return result
2543 else:
2544 error_msg = self._try_extract_error_code(res)
2545 logger.error(error_msg)
2546 raise BoostedAPIException(
2547 f"Failed to delete blacklist with id {blacklist_id}: {error_msg}"
2548 )
2549
2550 def getFeatureImportance(self, model_id, date, N=None):
2551 url = self.base_uri + f"/api/analysis/explainability/{model_id}"
2552 headers = {"Authorization": "ApiKey " + self.api_key}
2553 logger.info("Retrieving rankings information for date {0}.".format(date))
2554 res = requests.get(url, headers=headers, **self._request_params)
2555 if not res.ok:
2556 error_msg = self._try_extract_error_code(res)
2557 logger.error(error_msg)
2558 raise BoostedAPIException(
2559 f"Failed to fetch feature importance for model/portfolio {model_id}: {error_msg}"
2560 )
2561
2562 json_data = res.json()
2563 if "all" not in json_data.keys() or not json_data["all"]:
2564 raise BoostedAPIException(f"Unexpected formatting of feature importance response")
2565
2566 feature_data = json_data["all"]
2567 # find the right period (assuming returned json has dates in descending order)
2568 date_obj = self.__to_date_obj(date)
2569 start_date_for_return_data = self.__to_date_obj(feature_data[0]["date"])
2570 features_for_requested_period = None
2571
2572 if date_obj > start_date_for_return_data:
2573 features_for_requested_period = feature_data[0]["variable"]
2574 else:
2575 i = 0
2576 while i < len(feature_data) - 1:
2577 current_date = self.__to_date_obj(feature_data[i]["date"])
2578 next_date = self.__to_date_obj(feature_data[i + 1]["date"])
2579 if next_date <= date_obj <= current_date:
2580 features_for_requested_period = feature_data[i + 1]["variable"]
2581 start_date_for_return_data = next_date
2582 break
2583 i += 1
2584
2585 if features_for_requested_period is None:
2586 raise BoostedAPIException(f"No feature data was found for requested date: {date_obj}")
2587
2588 features_for_requested_period.sort(key=lambda x: x["value"], reverse=True)
2589
2590 if type(N) is int and N > 0:
2591 df = pd.DataFrame.from_dict(features_for_requested_period[0:N])
2592 else:
2593 df = pd.DataFrame.from_dict(features_for_requested_period)
2594 result = df[["feature", "value"]]
2595
2596 return result.rename(columns={"feature": f"feature ({start_date_for_return_data})"})
2597
2598 def getAllModelNames(self) -> Dict[str, str]:
2599 url = f"{self.base_uri}/api/graphql"
2600 headers = {"Authorization": "ApiKey " + self.api_key, "Content-Type": "application/json"}
2601 req_json = {"query": "query listOfModels {\n models { id name }}", "variables": {}}
2602 res = requests.post(url, json=req_json, headers=headers, **self._request_params)
2603 if not res.ok:
2604 error_msg = self._try_extract_error_code(res)
2605 logger.error(error_msg)
2606 raise BoostedAPIException(f"Failed to get user models: {error_msg}")
2607 data = res.json()
2608 if data["data"]["models"] is None:
2609 return {}
2610 return {rec["id"]: rec["name"] for rec in data["data"]["models"]}
2611
2612 def getAllModelDetails(self) -> Dict[str, Dict[str, Any]]:
2613 url = f"{self.base_uri}/api/graphql"
2614 headers = {"Authorization": "ApiKey " + self.api_key, "Content-Type": "application/json"}
2615 req_json = {
2616 "query": "query listOfModels {\n models { id name lastUpdated portfolios { id name }}}",
2617 "variables": {},
2618 }
2619 res = requests.post(url, json=req_json, headers=headers, **self._request_params)
2620 if not res.ok:
2621 error_msg = self._try_extract_error_code(res)
2622 logger.error(error_msg)
2623 raise BoostedAPIException(f"Failed to get user models: {error_msg}")
2624 data = res.json()
2625 if data["data"]["models"] is None:
2626 return {}
2627
2628 output_data = {}
2629 for rec in data["data"]["models"]:
2630 model_id = rec["id"]
2631 output_data[model_id] = {
2632 "name": rec["name"],
2633 "last_updated": parser.parse(rec["lastUpdated"]),
2634 "portfolios": rec["portfolios"],
2635 }
2636
2637 return output_data
2638
2639 def get_hedge_experiments(self):
2640 url = self.base_uri + "/api/graphql"
2641 qry = """
2642 query getHedgeExperiments {
2643 hedgeExperiments {
2644 hedgeExperimentId
2645 experimentName
2646 userId
2647 config
2648 description
2649 experimentType
2650 lastCalculated
2651 lastModified
2652 status
2653 portfolioCalcStatus
2654 targetSecurities {
2655 gbiId
2656 security {
2657 gbiId
2658 symbol
2659 name
2660 }
2661 weight
2662 }
2663 targetPortfolios {
2664 portfolioId
2665 }
2666 baselineModel {
2667 id
2668 name
2669
2670 }
2671 baselineScenario {
2672 hedgeExperimentScenarioId
2673 scenarioName
2674 description
2675 portfolioSettingsJson
2676 hedgeExperimentPortfolios {
2677 portfolio {
2678 id
2679 name
2680 modelId
2681 performanceGridHeader
2682 performanceGrid
2683 status
2684 tearSheet {
2685 groupName
2686 members {
2687 name
2688 value
2689 }
2690 }
2691 }
2692 }
2693 status
2694 }
2695 baselineStockUniverseId
2696 }
2697 }
2698 """
2699
2700 headers = {"Authorization": "ApiKey " + self.api_key}
2701 resp = requests.post(url, json={"query": qry}, headers=headers, params=self._request_params)
2702
2703 json_resp = resp.json()
2704 # graphql endpoints typically return 200 or 400 status codes, so we must
2705 # check if we have any errors, even with a 200
2706 if (resp.ok and "errors" in json_resp) or not resp.ok:
2707 error_msg = self._try_extract_error_code(resp)
2708 logger.error(error_msg)
2709 raise BoostedAPIException(
2710 (f"Failed to get hedge experiments: {resp.status_code=}; {error_msg=}")
2711 )
2712
2713 json_experiments = resp.json()["data"]["hedgeExperiments"]
2714 experiments = [HedgeExperiment.from_json_dict(exp_json) for exp_json in json_experiments]
2715 return experiments
2716
2717 def get_hedge_experiment_details(self, experiment_id: str):
2718 url = self.base_uri + "/api/graphql"
2719 qry = """
2720 query getHedgeExperimentDetails($hedgeExperimentId: ID!) {
2721 hedgeExperiment(hedgeExperimentId: $hedgeExperimentId) {
2722 ...HedgeExperimentDetailsSummaryListFragment
2723 }
2724 }
2725
2726 fragment HedgeExperimentDetailsSummaryListFragment on HedgeExperiment {
2727 hedgeExperimentId
2728 experimentName
2729 userId
2730 config
2731 description
2732 experimentType
2733 lastCalculated
2734 lastModified
2735 status
2736 portfolioCalcStatus
2737 targetSecurities {
2738 gbiId
2739 security {
2740 gbiId
2741 symbol
2742 name
2743 }
2744 weight
2745 }
2746 selectedModels {
2747 id
2748 name
2749 stockUniverse {
2750 name
2751 }
2752 }
2753 hedgeExperimentScenarios {
2754 ...experimentScenarioFragment
2755 }
2756 selectedDummyHedgeExperimentModels {
2757 id
2758 name
2759 stockUniverse {
2760 name
2761 }
2762 }
2763 targetPortfolios {
2764 portfolioId
2765 }
2766 baselineModel {
2767 id
2768 name
2769
2770 }
2771 baselineScenario {
2772 hedgeExperimentScenarioId
2773 scenarioName
2774 description
2775 portfolioSettingsJson
2776 hedgeExperimentPortfolios {
2777 portfolio {
2778 id
2779 name
2780 modelId
2781 performanceGridHeader
2782 performanceGrid
2783 status
2784 tearSheet {
2785 groupName
2786 members {
2787 name
2788 value
2789 }
2790 }
2791 }
2792 }
2793 status
2794 }
2795 baselineStockUniverseId
2796 }
2797
2798 fragment experimentScenarioFragment on HedgeExperimentScenario {
2799 hedgeExperimentScenarioId
2800 scenarioName
2801 status
2802 description
2803 portfolioSettingsJson
2804 hedgeExperimentPortfolios {
2805 portfolio {
2806 id
2807 name
2808 modelId
2809 performanceGridHeader
2810 performanceGrid
2811 status
2812 tearSheet {
2813 groupName
2814 members {
2815 name
2816 value
2817 }
2818 }
2819 }
2820 }
2821 }
2822 """
2823 headers = {"Authorization": "ApiKey " + self.api_key}
2824 resp = requests.post(
2825 url,
2826 json={"query": qry, "variables": {"hedgeExperimentId": experiment_id}},
2827 headers=headers,
2828 params=self._request_params,
2829 )
2830
2831 json_resp = resp.json()
2832 # graphql endpoints typically return 200 or 400 status codes, so we must
2833 # check if we have any errors, even with a 200
2834 if (resp.ok and "errors" in json_resp) or not resp.ok:
2835 error_msg = self._try_extract_error_code(resp)
2836 logger.error(error_msg)
2837 raise BoostedAPIException(
2838 (
2839 f"Failed to get hedge experiment results for {experiment_id=}: "
2840 f"{resp.status_code=}; {error_msg=}"
2841 )
2842 )
2843
2844 json_exp_results = json_resp["data"]["hedgeExperiment"]
2845 if json_exp_results is None:
2846 return None # issued a request with a non-existent experiment_id
2847 exp_results = HedgeExperimentDetails.from_json_dict(json_exp_results)
2848 return exp_results
2849
2850 def get_portfolio_performance(
2851 self,
2852 portfolio_id: str,
2853 start_date: Optional[datetime.date],
2854 end_date: Optional[datetime.date],
2855 daily_returns: bool,
2856 ) -> pd.DataFrame:
2857 """
2858 Get performance data for a portfolio.
2859
2860 Parameters
2861 ----------
2862 portfolio_id: str
2863 UUID corresponding to the portfolio in question.
2864 start_date: datetime.date
2865 Starting cutoff date to filter performance data
2866 end_date: datetime.date
2867 Ending cutoff date to filter performance data
2868 daily_returns: bool
2869 Flag indicating whether to add a new column with the daily return pct calculated
2870
2871 Returns
2872 -------
2873 pd.DataFrame object
2874 Portfolio and benchmark performance.
2875 -index:
2876 "date": pd.DatetimeIndex
2877 -columns:
2878 "benchmark": benchmark performance, % return
2879 "turnover": portfolio turnover, % of equity
2880 "portfolio": return since beginning of portfolio, % return
2881 "daily_returns": daily percent change in value of the portfolio, % return
2882 (this column is optional and depends on the daily_returns flag)
2883 """
2884 url = f"{self.base_uri}/api/graphql"
2885 qry = """
2886 query getPortfolioPerformance($portfolioId: ID!) {
2887 portfolio(id: $portfolioId) {
2888 id
2889 modelId
2890 name
2891 status
2892 performance {
2893 benchmark
2894 date
2895 turnover
2896 value
2897 }
2898 }
2899 }
2900 """
2901
2902 headers = {"Authorization": "ApiKey " + self.api_key}
2903 resp = requests.post(
2904 url,
2905 json={"query": qry, "variables": {"portfolioId": portfolio_id}},
2906 headers=headers,
2907 params=self._request_params,
2908 )
2909
2910 json_resp = resp.json()
2911 # the webserver returns an error for non-ready portfolios, so we have to check
2912 # for this prior to the error check below
2913 pf = json_resp["data"].get("portfolio")
2914 if pf is not None and pf["status"] != "READY":
2915 return pd.DataFrame()
2916
2917 # graphql endpoints typically return 200 or 400 status codes, so we must
2918 # check if we have any errors, even with a 200
2919 if (resp.ok and "errors" in json_resp) or not resp.ok:
2920 error_msg = self._try_extract_error_code(resp)
2921 logger.error(error_msg)
2922 raise BoostedAPIException(
2923 (
2924 f"Failed to get portfolio performance for {portfolio_id=}: "
2925 f"{resp.status_code=}; {error_msg=}"
2926 )
2927 )
2928
2929 perf = json_resp["data"]["portfolio"]["performance"]
2930 df = pd.DataFrame(perf).set_index("date").rename(columns={"value": "portfolio"})
2931 df.index = pd.to_datetime(df.index)
2932 if daily_returns:
2933 df["daily_returns"] = pd.to_numeric(df["portfolio"]).pct_change()
2934 df = df.dropna(subset=["daily_returns"])
2935 if start_date:
2936 df = df[df.index >= pd.to_datetime(start_date)]
2937 if end_date:
2938 df = df[df.index <= pd.to_datetime(end_date)]
2939 return df.astype(float)
2940
2941 def _is_portfolio_still_running(self, error_msg: str) -> bool:
2942 # this is jank af. a proper fix of this is either at the webserver
2943 # returning a better response for a portfolio in draft HT2-226, OR
2944 # a bigger refactor of the API that moves to more OOP, which would allow us
2945 # to have this data all in one place
2946 return "Could not find a model with this ID" in error_msg
2947
2948 def get_portfolio_factors(self, model_id: str, portfolio_id: str) -> pd.DataFrame:
2949 url = f"{self.base_uri}/api/analysis/factors/{model_id}/{portfolio_id}"
2950 headers = {"Authorization": "ApiKey " + self.api_key}
2951 resp = requests.get(url, headers=headers, params=self._request_params)
2952
2953 json_resp = resp.json()
2954 if (resp.ok and "errors" in json_resp) or not resp.ok:
2955 error_msg = json_resp["errors"][0]
2956 if self._is_portfolio_still_running(error_msg):
2957 return pd.DataFrame()
2958 logger.error(error_msg)
2959 raise BoostedAPIException(
2960 (
2961 f"Failed to get portfolio factors for {portfolio_id=}: "
2962 f"{resp.status_code=}; {error_msg=}"
2963 )
2964 )
2965
2966 df = pd.DataFrame(json_resp["data"], columns=json_resp["header_row"])
2967
2968 def to_lower_snake_case(s): # why are we linting lambdas? :(
2969 return "_".join(w.lower() for w in s.split(" "))
2970
2971 df = df.rename(columns={old: to_lower_snake_case(old) for old in df.columns}).set_index(
2972 "date"
2973 )
2974 df.index = pd.to_datetime(df.index)
2975 return df
2976
2977 def get_portfolio_volatility(self, model_id: str, portfolio_id: str) -> pd.DataFrame:
2978 url = f"{self.base_uri}/api/analysis/volatility_rolling/{model_id}/{portfolio_id}"
2979 headers = {"Authorization": "ApiKey " + self.api_key}
2980 resp = requests.get(url, headers=headers, params=self._request_params)
2981
2982 json_resp = resp.json()
2983 if (resp.ok and "errors" in json_resp) or not resp.ok:
2984 error_msg = json_resp["errors"][0]
2985 if self._is_portfolio_still_running(error_msg):
2986 return pd.DataFrame()
2987 logger.error(error_msg)
2988 raise BoostedAPIException(
2989 (
2990 f"Failed to get portfolio volatility for {portfolio_id=}: "
2991 f"{resp.status_code=}; {error_msg=}"
2992 )
2993 )
2994
2995 df = pd.DataFrame(json_resp["data"], columns=json_resp["headerRow"])
2996 df = df.rename(
2997 columns={old: old.lower().replace("avg", "avg_") for old in df.columns} # type: ignore
2998 ).set_index("date")
2999 df.index = pd.to_datetime(df.index)
3000 return df
3001
3002 def get_portfolio_holdings(self, model_id: str, portfolio_id: str) -> pd.DataFrame:
3003 url = f"{self.base_uri}/api/models/{model_id}/{portfolio_id}/basket-data"
3004 headers = {"Authorization": "ApiKey " + self.api_key}
3005 resp = requests.get(url, headers=headers, params=self._request_params)
3006
3007 # this is a classic abuse of try/except as control flow: we try to get json body
3008 # from the response so that we can error-check. if this fails, we assume we have
3009 # a legit text response (corresponding to the csv data we care about)
3010 try:
3011 json_resp = resp.json()
3012 except json.decoder.JSONDecodeError:
3013 df = pd.read_csv(io.StringIO(resp.text), header=[0])
3014 else:
3015 error_msg = json_resp["errors"][0]
3016 if self._is_portfolio_still_running(error_msg):
3017 return pd.DataFrame()
3018 else:
3019 logger.error(error_msg)
3020 raise BoostedAPIException(
3021 (
3022 f"Failed to get portfolio holdings for {portfolio_id=}: "
3023 f"{resp.status_code=}; {error_msg=}"
3024 )
3025 )
3026
3027 df = df.rename(columns={old: old.lower() for old in df.columns}).set_index("date")
3028 df.index = pd.to_datetime(df.index)
3029 return df
3030
3031 def getStockDataTableForDate(
3032 self, model_id: str, portfolio_id: str, date: datetime.date
3033 ) -> pd.DataFrame:
3034 headers = {"Authorization": "ApiKey " + self.api_key, "Content-Type": "application/json"}
3035
3036 url_base = f"{self.base_uri}/api/analysis"
3037 url_params = f"{model_id}/{portfolio_id}"
3038 formatted_date = date.strftime("%Y-%m-%d")
3039
3040 stock_prices_url = f"{url_base}/stock-prices/{url_params}/{formatted_date}"
3041 stock_factors_url = f"{url_base}/stock-factors/{url_params}/date/{formatted_date}"
3042
3043 prices_params = {"useTicker": "false", "useCurrentSignals": "true"}
3044 factors_param = {"useTicker": "false", "useCurrentSignals": "true"}
3045
3046 prices_resp = requests.get(
3047 stock_prices_url, headers=headers, params=prices_params, **self._request_params
3048 )
3049 factors_resp = requests.get(
3050 stock_factors_url, headers=headers, params=factors_param, **self._request_params
3051 )
3052
3053 frames = []
3054 gbi_ids = set()
3055 for res in (prices_resp, factors_resp):
3056 if not res.ok:
3057 error_msg = self._try_extract_error_code(res)
3058 logger.error(error_msg)
3059 raise BoostedAPIException(
3060 (
3061 f"Failed to fetch stock data table for model {model_id}"
3062 f" (it's possible no data is present for the given date: {date})."
3063 f" Error message: {error_msg}"
3064 )
3065 )
3066 result = res.json()
3067 df = pd.DataFrame(result)
3068 gbi_ids.update(df.columns.to_list())
3069 frames.append(pd.DataFrame(result))
3070
3071 all_gbiid_df = pd.concat(frames)
3072
3073 # Get the metadata of all GBI IDs
3074 gbiid_metadata_res = self._get_graphql(
3075 query=graphql_queries.GET_SEC_INFO_QRY, variables={"ids": [int(x) for x in gbi_ids]}
3076 )
3077 # Build a DF of metadata x GBI IDs
3078 gbiid_metadata_df = pd.DataFrame(
3079 {str(x["gbiId"]): x for x in gbiid_metadata_res["data"]["securities"]}
3080 )
3081 # Slice metadata we care. We'll drop "symbol" at the end.
3082 isin_country_currency_df = gbiid_metadata_df.loc[["isin", "country", "currency", "symbol"]]
3083 # Concatenate metadata to the existing stock data DF
3084 all_gbiid_with_metadata_df = pd.concat([all_gbiid_df, isin_country_currency_df])
3085 gbiid_with_symbol_df = all_gbiid_with_metadata_df.loc[
3086 :, all_gbiid_with_metadata_df.loc["symbol"].notna()
3087 ]
3088 renamed_df = gbiid_with_symbol_df.rename(
3089 index={"isin": "ISIN"}, columns=gbiid_with_symbol_df.loc["symbol"].to_dict()
3090 )
3091 output_df = renamed_df.drop(index=["symbol"])
3092 return output_df
3093
3094 def add_hedge_experiment_scenario(
3095 self,
3096 experiment_id: str,
3097 scenario_name: str,
3098 scenario_settings: PortfolioSettings,
3099 run_scenario_immediately: bool,
3100 ) -> HedgeExperimentScenario:
3101 add_scenario_input = {
3102 "hedgeExperimentId": experiment_id,
3103 "scenarioName": scenario_name,
3104 "portfolioSettingsJson": str(scenario_settings),
3105 "runExperimentOnScenario": run_scenario_immediately,
3106 "createDefaultPortfolio": "false",
3107 }
3108 qry = """
3109 mutation addHedgeExperimentScenario(
3110 $input: AddHedgeExperimentScenarioInput!
3111 ) {
3112 addHedgeExperimentScenario(input: $input) {
3113 hedgeExperimentScenario {
3114 hedgeExperimentScenarioId
3115 scenarioName
3116 description
3117 portfolioSettingsJson
3118 }
3119 }
3120 }
3121
3122 """
3123
3124 url = f"{self.base_uri}/api/graphql"
3125
3126 resp = requests.post(
3127 url,
3128 headers={"Authorization": "ApiKey " + self.api_key},
3129 json={"query": qry, "variables": {"input": add_scenario_input}},
3130 )
3131
3132 json_resp = resp.json()
3133 if (resp.ok and "errors" in json_resp) or not resp.ok:
3134 error_msg = self._try_extract_error_code(resp)
3135 logger.error(error_msg)
3136 raise BoostedAPIException(
3137 (f"Failed to add scenario: {resp.status_code=}; {error_msg=}")
3138 )
3139
3140 scenario_dict = json_resp["data"]["addHedgeExperimentScenario"]["hedgeExperimentScenario"]
3141 if scenario_dict is None:
3142 raise BoostedAPIException(
3143 "Failed to add scenario, likely due to bad experiment id or api key"
3144 )
3145 s = HedgeExperimentScenario.from_json_dict(scenario_dict)
3146 return s
3147
3148 # experiment life cycle has 4 steps:
3149 # 1. creation - essentially a very simple registration of a new instance, returning an id
3150 # 2. modify - populate with settings
3151 # 3. start - run the experiment
3152 # 4. delete - drop the experiment
3153 # while i would prefer to just have 2 funcs for (1,2,3) and (4) for a simpler api,
3154 # we need to expose finer-grained control becuase of how scenarios work.
3155 def create_hedge_experiment(
3156 self,
3157 name: str,
3158 description: str,
3159 experiment_type: hedge_experiment_type,
3160 target_securities: Union[Dict[GbiIdSecurity, float], str, None],
3161 ) -> HedgeExperiment:
3162 # we don't pass target_securities here (as much as id like to) because the
3163 # graphql input doesn't support it at this point
3164
3165 # note that this query returns a lot of null fields at this point, but
3166 # they are necessary for building a HE.
3167 create_qry = """
3168 mutation createDraftMutation($input: CreateHedgeExperimentDraftInput!) {
3169 createHedgeExperimentDraft(input: $input) {
3170 hedgeExperiment {
3171 hedgeExperimentId
3172 experimentName
3173 userId
3174 config
3175 description
3176 experimentType
3177 lastCalculated
3178 lastModified
3179 status
3180 portfolioCalcStatus
3181 targetSecurities {
3182 gbiId
3183 security {
3184 gbiId
3185 name
3186 symbol
3187 }
3188 weight
3189 }
3190 baselineModel {
3191 id
3192 name
3193 }
3194 baselineScenario {
3195 hedgeExperimentScenarioId
3196 scenarioName
3197 description
3198 portfolioSettingsJson
3199 hedgeExperimentPortfolios {
3200 portfolio {
3201 id
3202 name
3203 modelId
3204 performanceGridHeader
3205 performanceGrid
3206 status
3207 tearSheet {
3208 groupName
3209 members {
3210 name
3211 value
3212 }
3213 }
3214 }
3215 }
3216 status
3217 }
3218 baselineStockUniverseId
3219 }
3220 }
3221 }
3222 """
3223
3224 create_input: Dict[str, Any] = {
3225 "name": name,
3226 "experimentType": experiment_type,
3227 "description": description,
3228 }
3229 if isinstance(target_securities, dict):
3230 create_input["setTargetSecurities"] = [
3231 {"gbiId": sec.gbi_id, "weight": weight}
3232 for (sec, weight) in target_securities.items()
3233 ]
3234 elif isinstance(target_securities, str):
3235 create_input["setTargetPortfolios"] = [{"portfolioId": target_securities}]
3236 elif target_securities is None:
3237 pass
3238 else:
3239 raise TypeError(
3240 "Expected value of type Union[Dict[GbiIdSecurity, str], str] for "
3241 f"argument 'target_securities'; got {type(target_securities)}"
3242 )
3243 resp = requests.post(
3244 f"{self.base_uri}/api/graphql",
3245 json={"query": create_qry, "variables": {"input": create_input}},
3246 headers={"Authorization": "ApiKey " + self.api_key},
3247 params=self._request_params,
3248 )
3249
3250 json_resp = resp.json()
3251 if (resp.ok and "errors" in json_resp) or not resp.ok:
3252 error_msg = self._try_extract_error_code(resp)
3253 logger.error(error_msg)
3254 raise BoostedAPIException(
3255 (f"Failed to create hedge experiment: {resp.status_code=}; {error_msg=}")
3256 )
3257
3258 exp_dict = json_resp["data"]["createHedgeExperimentDraft"]["hedgeExperiment"]
3259 experiment = HedgeExperiment.from_json_dict(exp_dict)
3260 return experiment
3261
3262 def modify_hedge_experiment(
3263 self,
3264 experiment_id: str,
3265 name: Optional[str] = None,
3266 description: Optional[str] = None,
3267 experiment_type: Optional[hedge_experiment_type] = None,
3268 target_securities: Union[Dict[GbiIdSecurity, float], str, None] = None,
3269 model_ids: Optional[List[str]] = None,
3270 stock_universe_ids: Optional[List[str]] = None,
3271 create_default_scenario: bool = True,
3272 baseline_model_id: Optional[str] = None,
3273 baseline_stock_universe_id: Optional[str] = None,
3274 baseline_portfolio_settings: Optional[str] = None,
3275 ) -> HedgeExperiment:
3276 mod_qry = """
3277 mutation modifyHedgeExperimentDraft(
3278 $input: ModifyHedgeExperimentDraftInput!
3279 ) {
3280 modifyHedgeExperimentDraft(input: $input) {
3281 hedgeExperiment {
3282 ...HedgeExperimentSelectedSecuritiesPageFragment
3283 }
3284 }
3285 }
3286
3287 fragment HedgeExperimentSelectedSecuritiesPageFragment on HedgeExperiment {
3288 hedgeExperimentId
3289 experimentName
3290 userId
3291 config
3292 description
3293 experimentType
3294 lastCalculated
3295 lastModified
3296 status
3297 portfolioCalcStatus
3298 targetSecurities {
3299 gbiId
3300 security {
3301 gbiId
3302 name
3303 symbol
3304 }
3305 weight
3306 }
3307 targetPortfolios {
3308 portfolioId
3309 }
3310 baselineModel {
3311 id
3312 name
3313 }
3314 baselineScenario {
3315 hedgeExperimentScenarioId
3316 scenarioName
3317 description
3318 portfolioSettingsJson
3319 hedgeExperimentPortfolios {
3320 portfolio {
3321 id
3322 name
3323 modelId
3324 performanceGridHeader
3325 performanceGrid
3326 status
3327 tearSheet {
3328 groupName
3329 members {
3330 name
3331 value
3332 }
3333 }
3334 }
3335 }
3336 status
3337 }
3338 baselineStockUniverseId
3339 }
3340 """
3341 mod_input = {
3342 "hedgeExperimentId": experiment_id,
3343 "createDefaultScenario": create_default_scenario,
3344 }
3345 if name is not None:
3346 mod_input["newExperimentName"] = name
3347 if description is not None:
3348 mod_input["newExperimentDescription"] = description
3349 if experiment_type is not None:
3350 mod_input["newExperimentType"] = experiment_type
3351 if model_ids is not None:
3352 mod_input["setSelectdModels"] = model_ids
3353 if stock_universe_ids is not None:
3354 mod_input["selectedStockUniverseIds"] = stock_universe_ids
3355 if baseline_model_id is not None:
3356 mod_input["setBaselineModel"] = baseline_model_id
3357 if baseline_stock_universe_id is not None:
3358 mod_input["setBaselineStockUniverse"] = baseline_stock_universe_id
3359 if baseline_portfolio_settings is not None:
3360 mod_input["setBaselinePortfolioSettings"] = baseline_portfolio_settings
3361 # note that the behaviors bound to these data are mutually exclusive,
3362 # and its possible the opposite was set earlier in the DRAFT phase
3363 # of experiment creation, so when setting one, we must unset the other
3364 if isinstance(target_securities, dict):
3365 mod_input["setTargetSecurities"] = [
3366 {"gbiId": sec.gbi_id, "weight": weight}
3367 for (sec, weight) in target_securities.items()
3368 ]
3369 mod_input["setTargetPortfolios"] = None
3370 elif isinstance(target_securities, str):
3371 mod_input["setTargetPortfolios"] = [{"portfolioId": target_securities}]
3372 mod_input["setTargetSecurities"] = None
3373 elif target_securities is None:
3374 pass
3375 else:
3376 raise TypeError(
3377 "Expected value of type Union[Dict[GbiIdSecurity, str], str] "
3378 f"for argument 'target_securities'; got {type(target_securities)}"
3379 )
3380
3381 resp = requests.post(
3382 f"{self.base_uri}/api/graphql",
3383 json={"query": mod_qry, "variables": {"input": mod_input}},
3384 headers={"Authorization": "ApiKey " + self.api_key},
3385 params=self._request_params,
3386 )
3387
3388 json_resp = resp.json()
3389 if (resp.ok and "errors" in json_resp) or not resp.ok:
3390 error_msg = self._try_extract_error_code(resp)
3391 logger.error(error_msg)
3392 raise BoostedAPIException(
3393 (
3394 f"Failed to modify hedge experiment in preparation for start {experiment_id=}: "
3395 f"{resp.status_code=}; {error_msg=}"
3396 )
3397 )
3398
3399 exp_dict = json_resp["data"]["modifyHedgeExperimentDraft"]["hedgeExperiment"]
3400 experiment = HedgeExperiment.from_json_dict(exp_dict)
3401 return experiment
3402
3403 def start_hedge_experiment(self, experiment_id: str, *scenario_ids: str) -> HedgeExperiment:
3404 start_qry = """
3405 mutation startHedgeExperiment($input: StartHedgeExperimentInput!) {
3406 startHedgeExperiment(input: $input) {
3407 hedgeExperiment {
3408 hedgeExperimentId
3409 experimentName
3410 userId
3411 config
3412 description
3413 experimentType
3414 lastCalculated
3415 lastModified
3416 status
3417 portfolioCalcStatus
3418 targetSecurities {
3419 gbiId
3420 security {
3421 gbiId
3422 name
3423 symbol
3424 }
3425 weight
3426 }
3427 targetPortfolios {
3428 portfolioId
3429 }
3430 baselineModel {
3431 id
3432 name
3433 }
3434 baselineScenario {
3435 hedgeExperimentScenarioId
3436 scenarioName
3437 description
3438 portfolioSettingsJson
3439 hedgeExperimentPortfolios {
3440 portfolio {
3441 id
3442 name
3443 modelId
3444 performanceGridHeader
3445 performanceGrid
3446 status
3447 tearSheet {
3448 groupName
3449 members {
3450 name
3451 value
3452 }
3453 }
3454 }
3455 }
3456 status
3457 }
3458 baselineStockUniverseId
3459 }
3460 }
3461 }
3462 """
3463 start_input: Dict[str, Any] = {"hedgeExperimentId": experiment_id}
3464 if len(scenario_ids) > 0:
3465 start_input["hedgeExperimentScenarioIds"] = list(scenario_ids)
3466
3467 resp = requests.post(
3468 f"{self.base_uri}/api/graphql",
3469 json={"query": start_qry, "variables": {"input": start_input}},
3470 headers={"Authorization": "ApiKey " + self.api_key},
3471 params=self._request_params,
3472 )
3473
3474 json_resp = resp.json()
3475 if (resp.ok and "errors" in json_resp) or not resp.ok:
3476 error_msg = self._try_extract_error_code(resp)
3477 logger.error(error_msg)
3478 raise BoostedAPIException(
3479 (
3480 f"Failed to start hedge experiment {experiment_id=}: "
3481 f"{resp.status_code=}; {error_msg=}"
3482 )
3483 )
3484
3485 exp_dict = json_resp["data"]["startHedgeExperiment"]["hedgeExperiment"]
3486 experiment = HedgeExperiment.from_json_dict(exp_dict)
3487 return experiment
3488
3489 def delete_hedge_experiment(self, experiment_id: str) -> bool:
3490 delete_qry = """
3491 mutation($input: DeleteHedgeExperimentsInput!) {
3492 deleteHedgeExperiments(input: $input) {
3493 success
3494 }
3495 }
3496 """
3497 delete_input = {"hedgeExperimentIds": [experiment_id]}
3498 resp = requests.post(
3499 f"{self.base_uri}/api/graphql",
3500 json={"query": delete_qry, "variables": {"input": delete_input}},
3501 headers={"Authorization": "ApiKey " + self.api_key},
3502 params=self._request_params,
3503 )
3504
3505 json_resp = resp.json()
3506 if (resp.ok and "errors" in json_resp) or not resp.ok:
3507 error_msg = self._try_extract_error_code(resp)
3508 logger.error(error_msg)
3509 raise BoostedAPIException(
3510 (
3511 f"Failed to delete hedge experiment {experiment_id=}: "
3512 + f"status_code={resp.status_code}; error_msg={error_msg}"
3513 )
3514 )
3515
3516 return json_resp["data"]["deleteHedgeExperiments"]["success"]
3517
3518 def create_hedge_basket_position_bounds_from_csv(
3519 self,
3520 filepath: str,
3521 name: str,
3522 description: Optional[str],
3523 mapping_result_filepath: Optional[str],
3524 ) -> str:
3525 DATE = "Date"
3526 ISIN = "ISIN"
3527 COUNTRY = "Country"
3528 CURRENCY = "Currency"
3529 LOWER_BOUND = "Lower Bound"
3530 UPPER_BOUND = "Upper Bound"
3531 supported_columns = {
3532 DATE,
3533 ISIN,
3534 COUNTRY,
3535 CURRENCY,
3536 LOWER_BOUND,
3537 UPPER_BOUND,
3538 }
3539 required_columns = {ISIN, LOWER_BOUND, UPPER_BOUND}
3540
3541 try:
3542 df: pd.DataFrame = pd.read_csv(filepath, parse_dates=True)
3543 except Exception as e:
3544 raise BoostedAPIException(f"Error reading {filepath=}: {e}")
3545
3546 columns = set(df.columns)
3547
3548 # First perform basic data validation
3549 missing_required_columns = required_columns - columns
3550 if missing_required_columns:
3551 raise BoostedAPIException(
3552 f"The following required columns are missing: {missing_required_columns}"
3553 )
3554 extra_columns = columns - supported_columns
3555 if extra_columns:
3556 logger.warning(
3557 f"The following columns are unsupported and will be ignored: {extra_columns}"
3558 )
3559 try:
3560 df[LOWER_BOUND] = df[LOWER_BOUND].astype(float)
3561 df[UPPER_BOUND] = df[UPPER_BOUND].astype(float)
3562 df[ISIN] = df[ISIN].astype(str)
3563 except Exception as e:
3564 raise BoostedAPIException(f"Column datatypes are incorrect: {e}")
3565 lb_gt_ub = df[df[LOWER_BOUND] > df[UPPER_BOUND]]
3566 if not lb_gt_ub.empty:
3567 raise BoostedAPIException(
3568 f"Lower Bound must be <= Upper Bound, but these are not: {lb_gt_ub[ISIN].tolist()}"
3569 )
3570 out_of_range = df[
3571 (
3572 (df[LOWER_BOUND] < 0)
3573 | (df[LOWER_BOUND] > 1)
3574 | (df[UPPER_BOUND] < 0)
3575 | (df[UPPER_BOUND] > 1)
3576 )
3577 ]
3578 if not out_of_range.empty:
3579 raise BoostedAPIException("Lower Bound and Upper Bound values must be in range [0, 1]")
3580
3581 # Now map the security info into GBI IDs
3582 rows = list(df.to_dict(orient="index").values())
3583 sec_data_list = self.getGbiIdFromIdentCountryCurrencyDate(
3584 ident_country_currency_dates=[
3585 DateIdentCountryCurrency(
3586 date=row.get(DATE, datetime.date.today().isoformat()),
3587 identifier=row.get(ISIN),
3588 id_type=ColumnSubRole.ISIN,
3589 country=row.get(COUNTRY),
3590 currency=row.get(CURRENCY),
3591 )
3592 for row in rows
3593 ]
3594 )
3595
3596 # Now take each row and its gbi id mapping, and create the bounds list
3597 bounds = []
3598 for row, sec_data in zip(rows, sec_data_list):
3599 if sec_data is None:
3600 logger.warning(f"Failed to map {row[ISIN]}, skipping this security.")
3601 else:
3602 bounds.append(
3603 {"gbi_id": str(sec_data.gbi_id), "lb": row[LOWER_BOUND], "ub": row[UPPER_BOUND]}
3604 )
3605
3606 # Add security metadata to see the mapping
3607 row["Mapped GBI ID"] = sec_data.gbi_id
3608 row[f"Mapped {ISIN}"] = sec_data.isin_info.identifier
3609 row[f"Mapped {COUNTRY}"] = sec_data.isin_info.country
3610 row[f"Mapped {CURRENCY}"] = sec_data.isin_info.currency
3611 row["Mapped Ticker"] = sec_data.ticker
3612 row["Mapped Company Name"] = sec_data.company_name
3613
3614 # Call endpoint to create the bounds settings template
3615 qry = """
3616 mutation CreatePartialStrategyTemplate(
3617 $portfolioSettingsKey: String!
3618 $partialSettings: String!
3619 $name: String!
3620 $description: String
3621 ) {
3622 createPartialStrategyTemplate(
3623 portfolioSettingsKey: $portfolioSettingsKey
3624 partialSettings: $partialSettings
3625 name: $name
3626 description: $description
3627 )
3628 }
3629 """
3630 variables = {
3631 "portfolioSettingsKey": "basketTrading.positionSizeBounds",
3632 "partialSettings": json.dumps(bounds),
3633 "name": name,
3634 "description": description,
3635 }
3636 resp = self._get_graphql(qry, variables=variables)
3637
3638 # Write mapped csv for reference
3639 if mapping_result_filepath is not None:
3640 pd.DataFrame(rows).to_csv(mapping_result_filepath)
3641
3642 return resp["data"]["createPartialStrategyTemplate"]
3643
3644 def get_hit_rate_file(self, model_id: str, portfolio_id: str, file_key: str) -> dict:
3645 url = f"{self.base_uri}{ROUTE_PREFIX}{DAL_PA_ROUTE}/get-hit-rate-file/"
3646 headers = {"Authorization": "ApiKey " + self.api_key, "Content-Type": "application/json"}
3647 req_json = {"model_id": model_id, "portfolio_id": portfolio_id, "file_key": file_key}
3648 res = requests.post(url, json=req_json, headers=headers, **self._request_params)
3649 if not res.ok:
3650 error_msg = self._try_extract_error_code(res)
3651 logger.error(error_msg)
3652 raise BoostedAPIException(f"Failed to get Hit Rate file: {error_msg}")
3653
3654 data = res.json()
3655 return data
3656
3657 def get_hit_rate_with_securities(
3658 self,
3659 model_id: str,
3660 portfolio_id: str,
3661 meet_all_conditions: bool,
3662 securities: List[str],
3663 countries: List[str],
3664 sectors: List[str],
3665 start_date: Optional[BoostedDate],
3666 end_date: Optional[BoostedDate],
3667 ) -> dict:
3668
3669 start_date, end_date = get_date_range(start_date=start_date, end_date=end_date)
3670 start_date, end_date = start_date.isoformat(), end_date.isoformat()
3671
3672 url = f"{self.base_uri}{WATCHLIST_ROUTE_PREFIX}{DAL_PA_ROUTE}/get-hit-rate/" # noqa f"http://0.0.0.0:8000{DAL_PA_ROUTE}/get-securities-hit-rate/"
3673 headers = {"Authorization": "ApiKey " + self.api_key, "Content-Type": "application/json"}
3674 req_json = {
3675 "model_id": model_id,
3676 "portfolio_id": portfolio_id,
3677 "meet_all_conditions": meet_all_conditions,
3678 "securities": securities,
3679 "countries": countries,
3680 "sectors": sectors,
3681 "start_date": start_date,
3682 "end_date": end_date,
3683 }
3684 res = requests.post(url, json=req_json, headers=headers, **self._request_params)
3685
3686 if not res.ok:
3687 error_msg = self._try_extract_error_code(res)
3688 logger.error(error_msg)
3689 raise BoostedAPIException(f"Failed to get Hit Rate with securities: {error_msg}")
3690
3691 data = res.json()
3692 return data
3693
3694 def get_portfolio_accuracy(
3695 self,
3696 model_id: str,
3697 portfolio_id: str,
3698 start_date: Optional[BoostedDate] = None,
3699 end_date: Optional[BoostedDate] = None,
3700 ) -> dict:
3701 if start_date and end_date:
3702 validate_start_and_end_dates(start_date=start_date, end_date=end_date)
3703 start_date = convert_date(start_date)
3704 end_date = convert_date(end_date)
3705
3706 # TODO: Later change this URI to not use the watchlist prefix. It is misnamed.
3707 url = f"{self.base_uri}{WATCHLIST_ROUTE_PREFIX}{DAL_PA_ROUTE}/get-hit-rate/"
3708 headers = {"Authorization": "ApiKey " + self.api_key, "Content-Type": "application/json"}
3709 req_json = {"model_id": model_id, "portfolio_id": portfolio_id}
3710 if start_date and end_date:
3711 req_json["start_date"] = start_date.isoformat()
3712 req_json["end_date"] = end_date.isoformat()
3713 res = requests.post(url, json=req_json, headers=headers, **self._request_params)
3714
3715 if not res.ok:
3716 error_msg = self._try_extract_error_code(res)
3717 logger.error(error_msg)
3718 raise BoostedAPIException(f"Failed to get Hit Rate: {error_msg}")
3719
3720 data = res.json()
3721 return data
3722
3723 def create_watchlist(self, name: str) -> str:
3724 url = f"{self.base_uri}{WATCHLIST_ROUTE_PREFIX}{DAL_WATCHLIST_ROUTE}/create/"
3725 headers = {"Authorization": "ApiKey " + self.api_key, "Content-Type": "application/json"}
3726 req_json = {"name": name}
3727 res = requests.post(url, json=req_json, headers=headers, **self._request_params)
3728
3729 if not res.ok:
3730 error_msg = self._try_extract_error_code(res)
3731 logger.error(error_msg)
3732 raise BoostedAPIException(f"Failed to get user models: {error_msg}")
3733
3734 data = res.json()
3735 return data["watchlist_id"]
3736
3737 def _get_graphql(
3738 self,
3739 query: str,
3740 variables: Dict,
3741 error_msg_prefix: str = "Failed to get graphql result: ",
3742 log_error: bool = True,
3743 ) -> Dict:
3744 headers = {"Authorization": "ApiKey " + self.api_key}
3745 json_req = {"query": query, "variables": variables}
3746
3747 url = self.base_uri + "/api/graphql"
3748 resp = requests.post(
3749 url,
3750 json=json_req,
3751 headers=headers,
3752 params=self._request_params,
3753 )
3754
3755 # graphql endpoints typically return 200 or 400 status codes, so we must
3756 # check if we have any errors, even with a 200
3757 if not resp.ok or (resp.ok and "errors" in resp.json()):
3758 error_msg = self._try_extract_error_code(resp)
3759 error_str = str(error_msg_prefix) + f" {resp.status_code=}; {error_msg=}"
3760 if log_error:
3761 logger.error(error_str)
3762 raise BoostedAPIException(error_str)
3763
3764 json_resp = resp.json()
3765 return json_resp
3766
3767 def _get_security_info(self, gbi_ids: List[int]) -> Dict:
3768 query = graphql_queries.GET_SEC_INFO_QRY
3769 variables = {
3770 "ids": [] if not gbi_ids else gbi_ids,
3771 }
3772
3773 error_msg_prefix = "Failed to get Security Details:"
3774 return self._get_graphql(
3775 query=query, variables=variables, error_msg_prefix=error_msg_prefix
3776 )
3777
3778 def _get_sector_info(self) -> Dict:
3779 """
3780 Returns a list of sector objects, e.g.
3781 {
3782 "id": 1010,
3783 "parentId": 10,
3784 "name": "Energy",
3785 "topParentName": null,
3786 "spiqSectorId": -1,
3787 "legacy": false
3788 }
3789 """
3790 url = f"{self.base_uri}/api/sectors"
3791 headers = {"Authorization": "ApiKey " + self.api_key}
3792 res = requests.get(url, headers=headers, **self._request_params)
3793 self._check_ok_or_err_with_msg(res, "Failed to get sectors data")
3794 return res.json()["sectors"]
3795
3796 def _get_watchlist_analysis(
3797 self,
3798 gbi_ids: List[int],
3799 model_ids: List[str],
3800 portfolio_ids: List[str],
3801 asof_date=datetime.date.today(),
3802 ) -> Dict:
3803 query = graphql_queries.WATCHLIST_ANALYSIS_QRY
3804 variables = {
3805 "gbiIds": gbi_ids,
3806 "modelIds": model_ids,
3807 "portfolioIds": portfolio_ids,
3808 "date": self.__iso_format(asof_date),
3809 }
3810 error_msg_prefix = "Failed to get Coverage Analysis:"
3811 return self._get_graphql(
3812 query=query, variables=variables, error_msg_prefix=error_msg_prefix
3813 )
3814
3815 def _get_models_for_portfolio(self, portfolio_ids: List[str]) -> Dict:
3816 query = graphql_queries.GET_MODELS_FOR_PORTFOLIOS_QRY
3817 variables = {"ids": portfolio_ids}
3818 error_msg_prefix = "Failed to get Models for Portfolios: "
3819 return self._get_graphql(
3820 query=query, variables=variables, error_msg_prefix=error_msg_prefix
3821 )
3822
3823 def _get_excess_return(
3824 self, model_ids: List[str], gbi_ids: List[int], asof_date=datetime.date.today()
3825 ) -> Dict:
3826 query = graphql_queries.GET_EXCESS_RETURN_QRY
3827
3828 variables = {
3829 "modelIds": model_ids,
3830 "gbiIds": gbi_ids,
3831 "date": self.__iso_format(asof_date),
3832 }
3833 error_msg_prefix = "Failed to get Excess Return Slugging Pct: "
3834 return self._get_graphql(
3835 query=query, variables=variables, error_msg_prefix=error_msg_prefix
3836 )
3837
3838 def _coverage_column_name_format(self, in_str) -> str:
3839 if in_str.upper() == "ISIN":
3840 return "ISIN"
3841
3842 return in_str.title()
3843
3844 def _get_model_stocks(self, model_id: str) -> List[GbiIdTickerISIN]:
3845 # first, get the universe id
3846 resp = self._get_graphql(
3847 graphql_queries.GET_MODEL_STOCK_UNIVERSE_ID_QUERY,
3848 variables={"modelId": model_id},
3849 error_msg_prefix="Failed to get model stock universe ID",
3850 )
3851 universe_id = resp["data"]["model"]["stockUniverseId"]
3852
3853 # now, query for universe stocks
3854 url = self.base_uri + f"/api/stocks/model-universe/{universe_id}"
3855 headers = {"Authorization": "ApiKey " + self.api_key}
3856 universe_resp = requests.get(url, headers=headers, **self._request_params)
3857 universe = universe_resp.json()["stockUniverse"]
3858 securities = [
3859 GbiIdTickerISIN(gbi_id=security["id"], ticker=security["symbol"], isin=security["isin"])
3860 for security in universe
3861 ]
3862 return securities
3863
3864 def get_coverage_info(self, watchlist_id: str, portfolio_group_id: str) -> pd.DataFrame:
3865 # get securities list in watchlist
3866 watchlist_details = self.get_watchlist_details(watchlist_id)
3867 security_list = watchlist_details["targets"]
3868
3869 gbi_ids = [x["gbi_id"] for x in security_list]
3870
3871 gbi_data: Dict[Any, Dict] = {x: {} for x in gbi_ids}
3872
3873 # get security info ticker, name, industry etc
3874 sec_info = self._get_security_info(gbi_ids)
3875
3876 for sec in sec_info["data"]["securities"]:
3877 gbi_id = sec["gbiId"]
3878 for k in ["symbol", "name", "isin", "country", "currency"]:
3879 gbi_data[gbi_id][self._coverage_column_name_format(k)] = sec[k]
3880
3881 gbi_data[gbi_id][self._coverage_column_name_format("Sector")] = sec["sector"][
3882 "topParentName"
3883 ]
3884
3885 # get portfolios list in portfolio_Group
3886 portfolio_group = self.get_portfolio_group(portfolio_group_id)
3887 portfolio_ids = [x["portfolio_id"] for x in portfolio_group["portfolios"]]
3888 portfolio_info = {x["portfolio_id"]: x for x in portfolio_group["portfolios"]}
3889
3890 model_resp = self._get_models_for_portfolio(portfolio_ids=portfolio_ids)
3891 for portfolio in model_resp["data"]["portfolios"]:
3892 portfolio_info[portfolio["id"]].update(portfolio)
3893
3894 model_info = {
3895 x["modelId"]: portfolio_info[x["id"]] for x in model_resp["data"]["portfolios"]
3896 }
3897
3898 # model_ids and portfolio_ids are parallel arrays
3899 model_ids = [portfolio_info[x]["modelId"] for x in portfolio_ids]
3900
3901 # graphql: get watchlist analysis
3902 wl_analysis = self._get_watchlist_analysis(
3903 gbi_ids=gbi_ids,
3904 model_ids=model_ids,
3905 portfolio_ids=portfolio_ids,
3906 asof_date=datetime.date.today(),
3907 )
3908
3909 portfolio_gbi_data: Dict[Any, Dict] = {k: {} for k in portfolio_ids}
3910 for pi, v in portfolio_gbi_data.items():
3911 v.update({k: {} for k in gbi_data.keys()})
3912
3913 equity_explorer_date = wl_analysis["data"]["watchlistAnalysis"][0]["analysisDates"][0][
3914 "date"
3915 ]
3916 for wla in wl_analysis["data"]["watchlistAnalysis"]:
3917 gbi_id = wla["gbiId"]
3918 gbi_data[gbi_id]["Composite Rating"] = wla["analysisDates"][0]["aggregateSignal"][
3919 "rating"
3920 ]
3921 gbi_data[gbi_id]["Composite Rating Delta"] = wla["analysisDates"][0]["aggregateSignal"][
3922 "ratingDelta"
3923 ]
3924
3925 for p in wla["analysisDates"][0]["portfoliosSignals"]:
3926 model_name = portfolio_info[p["portfolioId"]]["modelName"]
3927
3928 portfolio_gbi_data[p["portfolioId"]][gbi_id][
3929 model_name + self._coverage_column_name_format(": rank")
3930 ] = (p["rank"] + 1)
3931 portfolio_gbi_data[p["portfolioId"]][gbi_id][
3932 model_name + self._coverage_column_name_format(": rank delta")
3933 ] = (-1 * p["signalDelta"])
3934 portfolio_gbi_data[p["portfolioId"]][gbi_id][
3935 model_name + self._coverage_column_name_format(": rating")
3936 ] = p["rating"]
3937 portfolio_gbi_data[p["portfolioId"]][gbi_id][
3938 model_name + self._coverage_column_name_format(": rating delta")
3939 ] = p["ratingDelta"]
3940
3941 neg_rec: Dict[Any, Dict] = {k: {} for k in gbi_data.keys()}
3942 pos_rec: Dict[Any, Dict] = {k: {} for k in gbi_data.keys()}
3943 for wla in wl_analysis["data"]["watchlistAnalysis"]:
3944 gbi_id = wla["gbiId"]
3945
3946 for pid, signals in zip(portfolio_ids, wla["analysisDates"][0]["portfoliosSignals"]):
3947 model_name = portfolio_info[pid]["modelName"]
3948 neg_rec[gbi_id][
3949 model_name + self._coverage_column_name_format(": negative recommendation")
3950 ] = signals["explainWeightNeg"]
3951 pos_rec[gbi_id][
3952 model_name + self._coverage_column_name_format(": positive recommendation")
3953 ] = signals["explainWeightPos"]
3954
3955 # graphql: GetExcessReturn - slugging pct
3956 er_sp = self._get_excess_return(
3957 model_ids=model_ids, gbi_ids=gbi_ids, asof_date=equity_explorer_date
3958 )
3959
3960 for model in er_sp["data"]["models"]:
3961 model_name = model_info[model["id"]]["modelName"]
3962 for stat in model["equityExplorerData"]["equityExplorerSummaryStatistics"]:
3963 portfolioId = model_info[model["id"]]["id"]
3964 portfolio_gbi_data[portfolioId][int(stat["gbiId"])][
3965 model_name + self._coverage_column_name_format(": slugging %")
3966 ] = (stat["ER"]["SP"]["sixMonthWindowOneMonthHorizon"] * 100)
3967
3968 # add rank, rating, slugging
3969 for pid, v in portfolio_gbi_data.items():
3970 for gbi_id, vv in v.items():
3971 gbi_data[gbi_id].update(vv)
3972
3973 # add neg/pos rec scores
3974 for rec in [neg_rec, pos_rec]:
3975 for k, v in rec.items():
3976 gbi_data[k].update(v)
3977
3978 df = pd.DataFrame.from_records([v for _, v in gbi_data.items()])
3979
3980 return df
3981
3982 def get_coverage_csv(
3983 self, watchlist_id: str, portfolio_group_id: str, filepath: Optional[str] = None
3984 ) -> Optional[str]:
3985 """
3986 Converts the coverage contents to CSV format
3987 Parameters
3988 ----------
3989 watchlist_id: str
3990 UUID str identifying the coverage watchlist
3991 portfolio_group_id: str
3992 UUID str identifying the group of portfolio to use for analysis
3993 filepath: Optional[str]
3994 UUID str identifying the group of portfolio to use for analysis
3995
3996 Returns:
3997 ----------
3998 None if filepath is provided, else a string with a csv's contents is returned
3999 """
4000
4001 df = self.get_coverage_info(watchlist_id, portfolio_group_id)
4002
4003 return df.to_csv(filepath, index=False, float_format="%.4f")
4004
4005 def get_watchlist_details(self, watchlist_id: str) -> Dict:
4006 url = f"{self.base_uri}{ROUTE_PREFIX}{DAL_WATCHLIST_ROUTE}/details/"
4007 headers = {"Authorization": "ApiKey " + self.api_key, "Content-Type": "application/json"}
4008 req_json = {"watchlist_id": watchlist_id}