# Bluesky Registered Users v1.3
# Created for Wikimedia Commons; last edited: 2024-01-15
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import FixedLocator
from datetime import datetime
'''
The 'data_array' variable holds the population dataset. Each day and value is paired in a list of strings,
formatted as "YYYY-MM-DD, ######", where YYYY-MM-DD is the ISO 8601 date, and ###### is the number of
existing users at the end of the day listed. Note that values after 2023-11-07 were calculated using
linear interpolation using website captures on the Internet Archive, and are rounded to the nearest 500.
'''
data_array = [
"2023-05-02, 55570", "2023-05-03, 58119", "2023-05-04, 60909", "2023-05-05, 63362",
"2023-05-06, 65035", "2023-05-07, 66194", "2023-05-08, 67477", "2023-05-09, 69053",
"2023-05-10, 70898", "2023-05-11, 72458", "2023-05-12, 74236", "2023-05-13, 76326",
"2023-05-14, 77472", "2023-05-15, 78972", "2023-05-16, 80328", "2023-05-17, 81682",
"2023-05-18, 83411", "2023-05-19, 84718", "2023-05-20, 85805", "2023-05-21, 86699",
"2023-05-22, 87761", "2023-05-23, 89034", "2023-05-24, 90565", "2023-05-25, 92559",
"2023-05-26, 94625", "2023-05-27, 96395", "2023-05-28, 97927", "2023-05-29, 99408",
"2023-05-30, 101215", "2023-05-31, 102578", "2023-06-01, 103919", "2023-06-02, 105437",
"2023-06-03, 106591", "2023-06-04, 107638", "2023-06-05, 108875", "2023-06-06, 110169",
"2023-06-07, 111989", "2023-06-08, 113673", "2023-06-09, 115868", "2023-06-10, 118019",
"2023-06-11, 120206", "2023-06-12, 122911", "2023-06-13, 124731", "2023-06-14, 128350",
"2023-06-15, 131274", "2023-06-16, 133937", "2023-06-17, 136152", "2023-06-18, 137423",
"2023-06-19, 138780", "2023-06-20, 140016", "2023-06-21, 143093", "2023-06-22, 153451",
"2023-06-23, 157707", "2023-06-24, 159933", "2023-06-25, 161913", "2023-06-26, 164419",
"2023-06-27, 169343", "2023-06-28, 174342", "2023-06-29, 179043", "2023-06-30, 184094",
"2023-07-01, 203365", "2023-07-02, 222026", "2023-07-03, 240687", "2023-07-04, 254189",
"2023-07-05, 262135", "2023-07-06, 271292", "2023-07-07, 278898", "2023-07-08, 284516",
"2023-07-09, 287745", "2023-07-10, 291393", "2023-07-11, 294869", "2023-07-12, 299667",
"2023-07-13, 304946", "2023-07-14, 309949", "2023-07-15, 313683", "2023-07-16, 317209",
"2023-07-17, 321258", "2023-07-18, 324959", "2023-07-19, 328269", "2023-07-20, 331240",
"2023-07-21, 335652", "2023-07-22, 340658", "2023-07-23, 347321", "2023-07-24, 370048",
"2023-07-25, 386014", "2023-07-26, 394662", "2023-07-27, 404693", "2023-07-28, 414773",
"2023-07-29, 423934", "2023-07-30, 433895", "2023-07-31, 442390", "2023-08-01, 450486",
"2023-08-02, 458146", "2023-08-03, 472748", "2023-08-04, 487476", "2023-08-05, 499582",
"2023-08-06, 511605", "2023-08-07, 525141", "2023-08-08, 535769", "2023-08-09, 543286",
"2023-08-10, 550163", "2023-08-11, 556915", "2023-08-12, 563776", "2023-08-13, 571517",
"2023-08-14, 581620", "2023-08-15, 591309", "2023-08-16, 601562", "2023-08-17, 611424",
"2023-08-18, 639033", "2023-08-19, 659173", "2023-08-20, 670671", "2023-08-21, 682208",
"2023-08-22, 692512", "2023-08-23, 703097", "2023-08-24, 723279", "2023-08-25, 742913",
"2023-08-26, 758806", "2023-08-27, 768190", "2023-08-28, 777341", "2023-08-29, 788438",
"2023-08-30, 807753", "2023-08-31, 826957", "2023-09-01, 845340", "2023-09-02, 855268",
"2023-09-03, 865253", "2023-09-04, 875942", "2023-09-05, 887544", "2023-09-06, 898496",
"2023-09-07, 913032", "2023-09-08, 933157", "2023-09-09, 952217", "2023-09-10, 968247",
"2023-09-11, 980472", "2023-09-12, 1001758", "2023-09-13, 1017594", "2023-09-14, 1028614",
"2023-09-15, 1038289", "2023-09-16, 1046587", "2023-09-17, 1055012", "2023-09-18, 1071682",
"2023-09-19, 1125267", "2023-09-20, 1157089", "2023-09-21, 1179696", "2023-09-22, 1197915",
"2023-09-23, 1211413", "2023-09-24, 1223934", "2023-09-25, 1237273", "2023-09-26, 1249557",
"2023-09-27, 1261156", "2023-09-28, 1272657", "2023-09-29, 1294216", "2023-09-30, 1308941",
"2023-10-01, 1324469", "2023-10-02, 1343949", "2023-10-03, 1361891", "2023-10-04, 1377805",
"2023-10-05, 1393473", "2023-10-06, 1410649", "2023-10-07, 1425252", "2023-10-08, 1438074",
"2023-10-09, 1452503", "2023-10-10, 1470776", "2023-10-11, 1489064", "2023-10-12, 1508389",
"2023-10-13, 1529182", "2023-10-14, 1543696", "2023-10-15, 1557027", "2023-10-16, 1570787",
"2023-10-17, 1583493", "2023-10-18, 1618452", "2023-10-19, 1646361", "2023-10-20, 1666281",
"2023-10-21, 1681299", "2023-10-22, 1694865", "2023-10-23, 1709734", "2023-10-24, 1725184",
"2023-10-25, 1738664", "2023-10-26, 1750386", "2023-10-27, 1761839", "2023-10-28, 1772651",
"2023-10-29, 1785693", "2023-10-30, 1799713", "2023-10-31, 1811867", "2023-11-01, 1823445",
"2023-11-02, 1836704", "2023-11-03, 1850723", "2023-11-04, 1863205", "2023-11-05, 1876044",
"2023-11-06, 1890622", "2023-11-07, 1902887", "2023-11-08, 1918000", "2023-11-09, 1935000",
"2023-11-10, 1953000", "2023-11-11, 1973500", "2023-11-12, 1994000", "2023-11-13, 2010500",
"2023-11-14, 2023500", "2023-11-15, 2038000", "2023-11-16, 2052000", "2023-11-17, 2067500",
"2023-11-18, 2085000", "2023-11-19, 2103500", "2023-11-20, 2124000", "2023-11-21, 2144500",
"2023-11-22, 2156000", "2023-11-23, 2170500", "2023-11-24, 2189500", "2023-11-25, 2202500",
"2023-11-26, 2217000", "2023-11-27, 2229000", "2023-11-28, 2252500", "2023-11-29, 2274500",
"2023-11-30, 2287000", "2023-12-01, 2318000", "2023-12-02, 2339500", "2023-12-03, 2364000",
"2023-12-04, 2383500", "2023-12-05, 2402000", "2023-12-06, 2418500", "2023-12-07, 2443500",
"2023-12-08, 2463500", "2023-12-09, 2484000", "2023-12-10, 2493500", "2023-12-11, 2503000",
"2023-12-12, 2510500", "2023-12-13, 2523500", "2023-12-14, 2538000", "2023-12-15, 2552500",
"2023-12-16, 2568000", "2023-12-17, 2585000", "2023-12-18, 2611500", "2023-12-19, 2635000",
"2023-12-20, 2658500", "2023-12-21, 2690000", "2023-12-22, 2712500", "2023-12-23, 2734500",
"2023-12-24, 2755500", "2023-12-25, 2773500", "2023-12-26, 2791000", "2023-12-27, 2805000",
"2023-12-28, 2821500", "2023-12-29, 2838000", "2023-12-30, 2854500", "2023-12-31, 2872000",
"2024-01-01, 2890000", "2024-01-02, 2904500", "2024-01-03, 2917500", "2024-01-04, 2927500",
"2024-01-05, 2935500", "2024-01-06, 2943000", "2024-01-07, 2961000", "2024-01-08, 2988500",
"2024-01-09, 3009500", "2024-01-10, 3017000", "2024-01-11, 3024000", "2024-01-12, 3031500",
"2024-01-13, 3039000", "2024-01-14, 3044000"
]
# Convert the data point strings to datetime objects.
date_rng = [datetime.strptime(row.split(',')[0].strip('"'), "%Y-%m-%d") for row in data_array]
y_values = [int(row.split(',')[1].strip()) for row in data_array]
# Create an initial plot.
fig, ax = plt.subplots(figsize=(10, 6))
# Plot the main data line and specify its z-order for layering.
ax.plot(date_rng, y_values, zorder=2)
# Add a grid and fill the area under the data line.
ax.grid(True, linestyle="-", linewidth=0.4, alpha=0.4)
ax.fill_between(date_rng, y_values, color="skyblue", alpha=0.4, zorder=1)
# Translators - edit these strings to translate into the desired language.
plt.title("Bluesky - Registered Users")
plt.xlabel("Date (YYYY-MM)")
plt.ylabel("Total Registered Users")
# Set the format for the x-axis to display only the first day of each month (ie. YYYY-MM).
ax.xaxis.set_major_locator(mdates.MonthLocator(bymonthday=1))
ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
# Make sure y-axis starts at 0.
ax.set_ylim(bottom=0)
# Format y-axis labels with commas and set tick locations.
y_ticks = ax.get_yticks()
ax.yaxis.set_major_locator(FixedLocator(y_ticks))
ax.set_yticklabels([f'{int(label):,}' for label in y_ticks])
# Hide y-axis offset number (1e6, etc.)
ax.yaxis.offsetText.set_visible(False)
'''
Logic to highlight and label the first day of each month:
1. Create an empty set to keep track of processed months.
2. Loop through each date-value pair.
3. Check if the day is the 1st of the month or if the month hasn't been processed yet.
4. Place a scatter dot on the first day of the unprocessed month.
5. Annotate the scatter dot with the corresponding value.
6. Add the month to the set to avoid processing it again.
'''
months = set()
for date, value in zip(date_rng, y_values):
if date.day == 1 or date.strftime("%Y-%m") not in months:
ax.scatter(date, value, color='C0', marker='o', alpha=1, zorder=3)
ax.annotate(f'{value:,}', (date, value), textcoords="offset points", xytext=(0, 10), ha='center')
months.add(date.strftime("%Y-%m"))
# Optimize for visibility, then display.
plt.tight_layout()
plt.show()