Update with latest finance projections (#42)

2i2c-org · Aug 21, 2024 · a5357f1 · a5357f1
1 parent 8103e63
commit a5357f1
Show file tree

Hide file tree

Showing 6 changed files with 117 additions and 34 deletions.
diff --git a/book/cloud.md b/book/cloud.md
@@ -5,7 +5,7 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.16.2
+    jupytext_version: 1.16.4
 kernelspec:
   display_name: Python 3 (ipykernel)
   language: python
@@ -252,8 +252,28 @@ communities['lon_jitter'] = communities['lon'].map(lambda a: a + np.random.norma
 ```
 
 ```{code-cell} ipython3
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [remove-cell]
+---
 def update_geo_fig(fig):
-    fig.update_geos(bgcolor=colors["paleblue"], landcolor="white", showland=True)
+    """Modify the style of a geo plot for 2i2c branding."""
+    fig.update_geos(oceancolor=colors["paleblue"], landcolor="white", subunitcolor="grey", bgcolor='rgba(0,0,0,0)', showland=True, showocean=True)
+    fig.update_layout(paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)"
+)
+
+def update_png_fig(fig):
+    """Update a plot for printing to a PNG."""
+    # Set minimum marker size
+    fig.update_traces(
+        marker=dict(
+            sizemin=10,
+        )
+    )
+    # Remove margin on PNG exports
+    fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
 ```
 
 ```{code-cell} ipython3
@@ -307,6 +327,7 @@ path_maps = Path("_static/maps/")
 path_maps.mkdir(parents=True, exist_ok=True)
 path_file = path_maps / f"2i2c_hubs_map.png"
 update_geo_fig(fig)
+update_png_fig(fig)
 fig.write_image(path_file)
 
 # Output for the cell
@@ -323,6 +344,7 @@ for constellation, idata in communities.groupby("Constellation"):
     display(Markdown(f"Constellation: **{constellation}**"))
     display(Markdown(f"Permanent link: {{download}}`2i2c.org/kpis{path_file} <{path_file}>`"))
     update_geo_fig(fig)
+    update_png_fig(fig)
     fig.show("png")
     fig.write_image(path_file)
 ```

diff --git a/book/finances.md b/book/finances.md
@@ -5,7 +5,7 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.16.3
+    jupytext_version: 1.16.4
 kernelspec:
   display_name: Python 3 (ipykernel)
   language: python
@@ -14,9 +14,9 @@ kernelspec:
 
 +++ {"editable": true, "slideshow": {"slide_type": ""}}
 
-# Contracts projections
+# Revenue projections
 
-This document shows 2i2c's historical contract data, and predicts 2i2c's monthly income along with its costs using data from our [Leads AirTable](https://airtable.com/appbjBTRIbgRiElkr/tblmRU6U53i8o7z2I/viw8xzzSXk8tPwBho?blocks=hide), which also pulls in data from our [Contracts AirTable](https://airtable.com/appbjBTRIbgRiElkr/tbliwB70vYg3hlkb1/viwWPJhcFbXUJZUO6?blocks=hide).
+This document shows 2i2c's historical revenue data by contract, and predicts 2i2c's monthly income along with its costs using data from our [Leads AirTable](https://airtable.com/appbjBTRIbgRiElkr/tblmRU6U53i8o7z2I/viw8xzzSXk8tPwBho?blocks=hide), which also pulls in data from our [Contracts AirTable](https://airtable.com/appbjBTRIbgRiElkr/tbliwB70vYg3hlkb1/viwWPJhcFbXUJZUO6?blocks=hide).
 
 When built via Jupyter Book, all leads are anonymized.
 If you want de-anonymized leads, run the notebook locally.
@@ -70,6 +70,7 @@ import plotly_express as px
 
 # Apply 2i2c default styles
 import twoc
+from twoc.dates import round_to_nearest_month
 from IPython.display import Markdown
 from itables import show as ishow
 from plotly.graph_objects import Figure
@@ -155,24 +156,59 @@ column_mappings = {
     "Contract Type": "Contract Type",
     # The type of service
     "Engagement Type": "Engagement Type",
-    # GitHub issue
-    "Issue": "Issue",
 }
 leads = pd.read_csv("./data/airtable-leads.csv", usecols=column_mappings.keys())
 leads = leads.rename(columns=column_mappings)
 ```
 
+```{code-cell} ipython3
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [remove-cell]
+---
+# Read in latest fundraising data from AirTable
+column_mappings = {
+    # Unique name
+    "Name": "Name",
+    # Status of the lead
+    "Status": "Status",
+    # The total amount for 2i2c after subtracting the FSP fee
+    "2i2c spendable amount": "Amount for 2i2c",
+    # The chance that we'll get this award
+    "% probability of success": "% success",
+    # The start date of the contract or the lead depending on what's there
+    "Start Date (final)": "Start Date",
+    # The end date of the contract or the lead depending on what's there
+    "End Date (final)": "End Date",
+}
+fundraising = pd.read_csv("./data/airtable-fundraising.csv", usecols=column_mappings.keys())
+fundraising = fundraising.rename(columns=column_mappings)
+
+# Quick clean up
+fundraising["Contract Type"] = "Core Funding"
+fundraising["Engagement Type"] = "Core Funding"
+fundraising = fundraising.replace({"Ask": "Prospect", "Cultivate": "Prospect"})
+fundraising = fundraising.query("`% success` > 0")
+fundraising["% success"] /= 100.
+```
+
 ```{code-cell} ipython3
 ---
 editable: true
 slideshow:
   slide_type: ''
 tags: [remove-input]
 ---
+# Concatenate them so that we can analyze them together
+leads = pd.concat([leads, fundraising])
+
 # Anonymize leads if we are in a CI/CD environment because this will be public
 if "GITHUB_ACTION" in os.environ:
     for ix, name in leads["Name"].items():
         leads.loc[ix, "Name"] = f"Lead {ix}"
+
 leads.head().style.set_caption("Sample leads from our Leads AirTable.")
 ```
 
@@ -199,7 +235,7 @@ missing_amount_for_2i2c = ~leads.eval("`Amount for 2i2c` > 0")
 
 # Don't worry about the % success / issue columns in case they're missing
 missing_values = (
-    leads.drop(columns=["% success", "Issue"]).isnull().apply(lambda a: any(a), axis=1)
+    leads.drop(columns=["% success"]).isnull().apply(lambda a: any(a), axis=1)
 )
 leads_to_remove = missing_amount_for_2i2c | missing_values
 leads_to_remove = leads_to_remove[leads_to_remove == True].index
@@ -234,16 +270,18 @@ for ix, irow in leads.iterrows():
     # If it's awarded then skip it because we're only marking prospectives
     if "Awarded" in irow["Status"]:
         continue
-    if irow["Status"] == "Renewal":
+    if irow["Status"].lower() == "renewal":
         leads.loc[ix, "Contract Type"] = "Projected renewal"
         leads.loc[ix, "Engagement Type"] = "Projected renewal"
+    elif irow["Status"].lower() == "needs admin":
+        leads.loc[ix, "Contract Type"] = "Needs admin"
+        leads.loc[ix, "Engagement Type"] = "Needs admin"
+    elif irow["Engagement Type"].lower() == "core funding":
+        leads.loc[ix, "Contract Type"] = "Projected core funding"
+        leads.loc[ix, "Engagement Type"] = "Projected core funding"
     else:
-        if irow["Engagement Type"] == "Core funding":
-            leads.loc[ix, "Contract Type"] = "Projected core funding"
-            leads.loc[ix, "Engagement Type"] = "Projected core funding"
-        else:
-            leads.loc[ix, "Contract Type"] = "Projected new contract"
-            leads.loc[ix, "Engagement Type"] = "Projected new contract"
+        leads.loc[ix, "Contract Type"] = "Projected new contract"
+        leads.loc[ix, "Engagement Type"] = "Projected new contract"
 ```
 
 +++ {"editable": true, "slideshow": {"slide_type": ""}}
@@ -295,15 +333,6 @@ slideshow:
   slide_type: ''
 tags: [remove-cell]
 ---
-def round_to_nearest_month(date):
-    start_of_current_month = pd.to_datetime(f"{date.year}-{date.month}")
-    start_of_next_month = date + pd.offsets.MonthBegin()
-    if date.day < 15:
-        return start_of_current_month
-    else:
-        return start_of_next_month
-
-
 # Convert date columns to DateTime objects
 date_cols = ["Start Date", "End Date"]
 for col in date_cols:
@@ -430,6 +459,7 @@ colors = {
     "Core funding": twoc.colors["bigblue"],
     "Partnership": twoc.colors["mauve"],
     "Hub service": twoc.colors["coral"],
+    "Needs admin": "#ffa8a9",
     "Projected renewal": "grey",
     "Projected core funding": "darkgrey",
     "Projected new contract": "lightgrey",
@@ -461,6 +491,8 @@ write_image(figservice, "_build/images/service_type.png")
 figservice
 ```
 
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
 ## Budget projections
 
 Now we project into the future by including our **potential leads** as well.
@@ -484,15 +516,20 @@ date_future = round_to_nearest_month(today + datetime.timedelta(days=30 * 12))
 qu_date = f"Date >= '{date_past:%Y-%m-%d}' and Date <= '{date_future:%Y-%m-%d}'"
 
 for iname in ["Monthly amount (weighted)", "Monthly amount"]:
+    # Bar plot of revenue
+    data_plot = amortized_records.query(qu_date)
+    if iname == "Monthly amount":
+        # If we are using total amount, only use records with > 25% chance success
+        data_plot = data_plot.query("`% success` > .25")
+
     figservice = px.bar(
-        amortized_records.query(qu_date),
+        data_plot,
         x="Date",
         y=iname,
         color="Engagement Type",
         category_orders={"Engagement Type": colors.keys()},
         color_discrete_map=colors,
         hover_name="Name",
-        # hover_data=["Monthly amount", "Monthly amount (weighted)", "Total amount", "% success"],
         hover_data={
             "Monthly amount": ":$,.0f",
             "Monthly amount (weighted)": ":$,.0f",
@@ -502,10 +539,12 @@ for iname in ["Monthly amount (weighted)", "Monthly amount"]:
         title=(
             "Monthly Revenue (weighted)"
             if "weighted" in iname
-            else "Monthly Revenue (best case scenario)"
+            else "Monthly Revenue if contracts over 50% chance are awarded"
         ),
     )
     figservice.update_traces(marker_line_width=0.2)
+
+    # Dotted line plot of costs
     figservice.add_scatter(
         x=costs.query(qu_date)["Date"],
         y=costs.query(qu_date)["Monthly cost (no FSP)"],

diff --git a/book/scripts/download_airtable_data.py b/book/scripts/download_airtable_data.py
@@ -38,6 +38,7 @@
     ("accounting", "appbjBTRIbgRiElkr", "tblNjmVbPaVmC7wc3", "viw1daKSu2dTcd5lg"),
     ("contracts", "appbjBTRIbgRiElkr", "tbliwB70vYg3hlkb1", "viwWPJhcFbXUJZUO6"),
     ("leads", "appbjBTRIbgRiElkr", "tblmRU6U53i8o7z2I", "viw8xzzSXk8tPwBho"),
+    ("fundraising", "appbjBTRIbgRiElkr", "tblM9Fv7J4Nl4c0L3", "viwtUpulzqMmqkxsJ"),
 ]
 ## Load in airtable
 api = Api(api_key)

diff --git a/book/twoc/dates.py b/book/twoc/dates.py
@@ -0,0 +1,16 @@
+"""Date utilities for 2i2c."""
+import pandas as pd
+
+def round_to_nearest_month(date):
+    """
+    Round a date to the start day of the nearest month.
+    
+    This helps us avoid under-counting months when a start date is the 1st
+    and the end date is the 31st.
+    """
+    start_of_current_month = pd.to_datetime(f"{date.year}-{date.month}")
+    start_of_next_month = start_of_current_month + pd.offsets.MonthBegin()
+    if date.day < 15:
+        return start_of_current_month
+    else:
+        return start_of_next_month
diff --git a/book/upstream.md b/book/upstream.md
@@ -5,7 +5,7 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.16.3
+    jupytext_version: 1.16.4
 kernelspec:
   display_name: Python 3 (ipykernel)
   language: python
@@ -101,7 +101,7 @@ for _, row in data.iterrows():
     iicomments = pd.DataFrame(literal_eval(row["comments"]))
     if iicomments.shape[0] > 0:
         iicomments["author"] = iicomments["author"].map(lambda a: a["login"] if a is not None else None)
-        iicomments[["org", "repo"]] = row[["org", "repo"]]
+        iicomments.loc[:, ["org", "repo"]] = row[["org", "repo"]].tolist()
         new_comments.append(iicomments)
 comments = pd.concat(new_comments)
 
@@ -316,7 +316,7 @@ tags: [remove-input]
 ---
 issues = data.loc[["issues/" in ii for ii in data["url"].values]]
 issuesByUs = issues.dropna(subset="createdAt").query("author in @team")
-visualize_over_time(issuesByUs, on="closedAt", title="Issues opened by a team member, over time")
+visualize_over_time(issuesByUs, on="updatedAt", title="Issues opened by a team member, over time")
 ```
 
 +++ {"editable": true, "slideshow": {"slide_type": ""}}
@@ -343,9 +343,14 @@ visualize_by_org_repo(issuesByUs, "Issues opened by a team member, by repository
 
 Comments are a reflection of where we're participating in conversations, discussions, brainstorming, guiding others, etc. They are a reflection of "overall activity" because comments tend to happen everywhere, and may not be associated with a specific change to the code.
 
-+++ {"editable": true, "slideshow": {"slide_type": ""}}
-
+```{code-cell} ipython3
+---
+editable: true
+slideshow:
+  slide_type: ''
+---
 visualize_over_time(comments, title="Comments made by a team member, over time")
+```
 
 +++ {"editable": true, "slideshow": {"slide_type": ""}}
 

diff --git a/requirements.txt b/requirements.txt
@@ -22,13 +22,13 @@ tomlkit
 vega_datasets
 
 # For developing
-jupyterlab-code-formatter
 black
 isort
 itables
 jupyterlab
-jupytext
+jupyterlab-code-formatter
 jupyterlab-myst
+jupytext
 
 # For loading grafana data
 python-dotenv