From 6a944c5d5e00513f0aff00c05c9168835736a6a6 Mon Sep 17 00:00:00 2001 From: Silke Nodwell Date: Sat, 24 Jan 2026 21:19:56 +0000 Subject: [PATCH 1/5] Small bug fix so that the --- is on the first line --- tools/blog_automation/doc_to_html_conversion.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/blog_automation/doc_to_html_conversion.py b/tools/blog_automation/doc_to_html_conversion.py index 3d9cc987..b8efd69c 100644 --- a/tools/blog_automation/doc_to_html_conversion.py +++ b/tools/blog_automation/doc_to_html_conversion.py @@ -9,8 +9,7 @@ # --- Configuration --- SERVICE_ACCOUNT_FILE = 'service_account_key.json' -YAML_HEADER = ''' ---- +YAML_HEADER = '''--- layout: post title: [TITLE] date: [DATE] From b62cc3c33c911b7b71e31db0678d87424cbf1731 Mon Sep 17 00:00:00 2001 From: Silke Nodwell Date: Sun, 25 Jan 2026 14:58:13 +0000 Subject: [PATCH 2/5] Update blog post html with info from spreadsheet --- .../blog_automation/doc_to_html_conversion.py | 106 ++++++- tools/blog_automation/requirements.txt | 259 +++++++++++++++++- 2 files changed, 351 insertions(+), 14 deletions(-) diff --git a/tools/blog_automation/doc_to_html_conversion.py b/tools/blog_automation/doc_to_html_conversion.py index b8efd69c..7f8c6cc7 100644 --- a/tools/blog_automation/doc_to_html_conversion.py +++ b/tools/blog_automation/doc_to_html_conversion.py @@ -1,3 +1,4 @@ +import shutil from google.oauth2 import service_account from googleapiclient.discovery import build import os @@ -6,9 +7,12 @@ from pathlib import Path from googleapiclient.errors import HttpError import datetime as dt +import pandas as pd +import re # --- Configuration --- SERVICE_ACCOUNT_FILE = 'service_account_key.json' +SPREADSHEET_ID = '1Pje2qOn23OgtAyhjqKwQFYcaEAE3gAy5f3T_5LCgA2o' YAML_HEADER = '''--- layout: post title: [TITLE] @@ -25,6 +29,22 @@ def _current_directory(): return Path(__file__).resolve().parent +def drive_connection(): + service_account_path = os.path.join(_current_directory(), SERVICE_ACCOUNT_FILE) + if not os.path.exists(service_account_path): + print(f"ERROR: Service account key file '{service_account_path}' not found.\n" + "Please obtain your own Google service account key and place it at this path.\n" + "(Never commit this file to version control.)") + exit(1) + creds = service_account.Credentials.from_service_account_file( + service_account_path, + scopes=['https://www.googleapis.com/auth/drive.readonly'] + ) + drive = build('drive', 'v3', credentials=creds) + return drive + +drive = drive_connection() + def _posts_directory(): # Path to the directory where the script itself is located script_dir = _current_directory() @@ -42,20 +62,49 @@ def _create_blog_filename_with_date(doc_name, date_str): filename = f"{date_str}-{formatted_blog_title}" return filename -def export_blog_as_html(document_id, date=None): +def _get_info_from_spreadsheet(drive=drive, spreadsheet_id=SPREADSHEET_ID): + import gspread + import pandas as pd + + # 1) Authenticate using the service account JSON + gc = gspread.service_account(filename="service_account_key.json") + + # 2) Open the spreadsheet by its ID + spreadsheet_id = SPREADSHEET_ID + sh = gc.open_by_key(spreadsheet_id) + + # 3) Select a worksheet/tab (by gid or title) + worksheet = sh.worksheet("Form Responses 1") + + # 4) Get data + data = worksheet.get_all_records() + + # 5) Convert to a pandas DataFrame + df = pd.DataFrame(data) + return df + +def _update_yaml_header_with_spreadsheet_info(yaml_header): + spreadsheet_info = _get_info_from_spreadsheet(drive=drive).iloc[-1].to_dict() + try: + author_name = spreadsheet_info['What is your full name? '] + author_role = spreadsheet_info[ + 'What is your position / company you are working at / associated with? ' + ] + description = spreadsheet_info['Please provide a short description of your writing idea / blog post? '] + source = spreadsheet_info[ + 'Please provide a source of how you obtained/created the infographic/photo/picture used.' + ] + yaml_header = yaml_header.replace('[AUTHOR]', author_name) + yaml_header = yaml_header.replace('[AUTHOR ROLE]', author_role) + yaml_header = yaml_header.replace('[DESCRIPTION]', description) + yaml_header = yaml_header.replace('[SOURCE]', source) + return yaml_header + except KeyError as error: + print(f'Unable to find relevant spreadsheet field. Please check the spreadsheet carefully.\n{error}') + +def export_blog_as_html(document_id, spreadsheet_info, date=None, drive=drive): if date is None: date = _today_date_str() - service_account_path = os.path.join(_current_directory(), SERVICE_ACCOUNT_FILE) - if not os.path.exists(service_account_path): - print(f"ERROR: Service account key file '{service_account_path}' not found.\n" - "Please obtain your own Google service account key and place it at this path.\n" - "(Never commit this file to version control.)") - exit(1) - creds = service_account.Credentials.from_service_account_file( - service_account_path, - scopes=['https://www.googleapis.com/auth/drive.readonly'] - ) - drive = build('drive', 'v3', credentials=creds) try: # 1. Get document name from Drive @@ -89,6 +138,7 @@ def export_blog_as_html(document_id, date=None): # YAML front matter yaml_header = YAML_HEADER.replace('[TITLE]', doc_name.title()).replace('[DATE]', date) + yaml_header = _update_yaml_header_with_spreadsheet_info(yaml_header) final_html = yaml_header + '\n' + html_body @@ -98,6 +148,38 @@ def export_blog_as_html(document_id, date=None): f.write(final_html) print(f"Saved HTML to: {filename}") + return blog_filename + +def download_blog_image(spreadsheet_info): + blog_image_drive_link = spreadsheet_info['Submit your blog cover image'] + file_id = re.search(r'drive\.google\.com/file/d/([^/]+)/', blog_image_drive_link).group(1) + # Download the image file + try: + request = drive.files().get_media(fileId=file_id) + image_data = request.execute() + # Save the image locally + image_filename = f"blog_image_{file_id}.jpg" + with open(image_filename, 'wb') as img_file: + img_file.write(image_data) + return image_filename + except HttpError as error: + print(f"Error downloading image: {error}") + return None + +def copy_image_to_blog_assets(image_filename, blog_filename): + assets_dir = Path(__file__).resolve().parent.parent.parent / 'assets' / 'images' / 'blog' + assets_dir.mkdir(parents=True, exist_ok=True) + date_prefix = blog_filename.split('-')[0] + new_image_filename = f"{date_prefix}-{image_filename}" + new_image_path = assets_dir / new_image_filename + shutil.copy(image_filename, new_image_path) + return f"/assets/images/blog/{new_image_filename}" + +def export_blog_with_image(document_id): + spreadsheet_info = _get_info_from_spreadsheet(drive=drive) + blog_filename = export_blog_as_html(document_id, spreadsheet_info) + image_filename = download_blog_image(spreadsheet_info) + copy_image_to_blog_assets(image_filename, blog_filename) if __name__ == "__main__": # To run script: `python export_blog.py --date ` diff --git a/tools/blog_automation/requirements.txt b/tools/blog_automation/requirements.txt index fa7d8270..fcc2a6e4 100644 --- a/tools/blog_automation/requirements.txt +++ b/tools/blog_automation/requirements.txt @@ -1,23 +1,278 @@ +affine @ file:///home/conda/feedstock_root/build_artifacts/affine_1733762038348/work +aiohappyeyeballs @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_86beu757ao/croot/aiohappyeyeballs_1755769847762/work +aiohttp @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_44bhte2f2d/croot/aiohttp_1734692700992/work +aiosignal @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_e3cjtbh902/croot/aiosignal_1755870814211/work +altair==5.5.0 +anaconda-anon-usage @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_3eler6mjxh/croot/anaconda-anon-usage_1710965076906/work +anaconda-client @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_4fl23009pr/croot/anaconda-client_1708640644054/work +anaconda-cloud-auth @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_bazegf935a/croot/anaconda-cloud-auth_1713991395391/work +anaconda-navigator @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_acxcfb858r/croot/anaconda-navigator_1719499393980/work +anaconda-project @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_6ddp1qj6b_/croots/recipe/anaconda-project_1660339893712/work +annotated-types==0.7.0 +anyio @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_a17a7759g2/croot/anyio_1706220182417/work +appnope @ file:///Users/ktietz/ci_310/appnope_1643965056645/work +archspec @ file:///croot/archspec_1709217642129/work +argon2-cffi @ file:///opt/conda/conda-bld/argon2-cffi_1645000214183/work +argon2-cffi-bindings @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/croot-wbf5edig/argon2-cffi-bindings_1644845754377/work +asttokens @ file:///opt/conda/conda-bld/asttokens_1646925590279/work +async-lru @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_02efro5ps8/croot/async-lru_1699554529181/work +async-timeout @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_0fagvkecl5/croot/async-timeout_1732662297241/work +attrs==25.3.0 +Babel @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_00k1rl2pus/croot/babel_1671781944131/work +backports.functools-lru-cache @ file:///tmp/build/80754af9/backports.functools_lru_cache_1618170165463/work +backports.tempfile @ file:///home/linux1/recipes/ci/backports.tempfile_1610991236607/work +backports.weakref==1.0.post1 +beautifulsoup4 @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_94rx5n7wo9/croot/beautifulsoup4-split_1718029832430/work +bleach @ file:///opt/conda/conda-bld/bleach_1641577558959/work +blinker==1.9.0 +blis==1.3.0 +boltons @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_f63n9uulmp/croot/boltons_1677628710094/work +Bottleneck @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_2bxpizxa3c/croot/bottleneck_1707864819812/work +branca @ file:///Users/ec2-user/croot/branca_1767093573661/work +Brotli @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_27zk0eqdh0/croot/brotli-split_1714483157007/work cachetools==5.5.2 -certifi==2025.8.3 +Cartopy @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_5bk2zu8mcq/croot/cartopy_1741279427363/work +catalogue==2.0.10 +certifi @ file:///Users/ec2-user/croot/certifi_1767659142737/work/certifi +cffi @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_7a9c7wyorr/croot/cffi_1714483157752/work +chardet @ file:///Users/ktietz/ci_310/chardet_1643965356347/work charset-normalizer==3.4.2 +click==8.2.1 +click-plugins @ file:///home/conda/feedstock_root/build_artifacts/click-plugins_1750848229740/work +cligj @ file:///home/conda/feedstock_root/build_artifacts/cligj_1733749956636/work +cloudpathlib==0.21.1 +clyent==1.2.2 +comm @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_3doui0bmzb/croot/comm_1709322861485/work +conda @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_3b7pmnimud/croot/conda_1754469522029/work/conda-src +conda-build @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_e96dm09bk9/croot/conda-build_1716991294552/work +conda-content-trust @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_9fusbfzixa/croot/conda-content-trust_1714483157715/work +conda-libmamba-solver @ file:///croot/conda-libmamba-solver_1745607008911/work/src +conda-pack @ file:///tmp/build/80754af9/conda-pack_1611163042455/work +conda-package-handling @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_ef9phnqphe/croot/conda-package-handling_1718138279942/work +conda-repo-cli @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_d9sayfhoiy/croot/conda-repo-cli_1709246580063/work +conda-token @ file:///croot/conda-token_1718995751285/work +conda-verify==3.4.2 +conda_index @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_0au56q2_5k/croot/conda-index_1719338215248/work +conda_package_streaming @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_6dgq200203/croot/conda-package-streaming_1718136087190/work +confection==0.1.5 +contextily @ file:///home/conda/feedstock_root/build_artifacts/contextily_1764021777332/work +contourpy==1.3.2 +cryptography @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_31zgxm62w8/croot/cryptography_1714660690857/work +cycler @ file:///Users/ec2-user/croot/cycler_1766067937639/work +cymem==2.0.11 +datasets @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_62tsfo91_6/croot/datasets_1741368007051/work +debugpy @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_563_nwtkoc/croot/debugpy_1690905063850/work +decorator @ file:///opt/conda/conda-bld/decorator_1643638310831/work +defusedxml @ file:///tmp/build/80754af9/defusedxml_1615228127516/work +dill @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_28zwy_olqk/croot/dill_1715094676263/work +distro @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_ddkyz0575y/croot/distro_1714488254309/work +dotenv==0.9.9 +en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl +exceptiongroup @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_b2258scr33/croot/exceptiongroup_1706031391815/work +executing @ file:///opt/conda/conda-bld/executing_1646925071911/work +faiss==1.9.0 +fastjsonschema @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_43a0jaiddu/croots/recipe/python-fastjsonschema_1661368628129/work +filelock @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_d3quwmvouf/croot/filelock_1700591194006/work +folium @ file:///opt/miniconda3/conda-bld/folium_1758749951921/work +fonttools @ file:///opt/miniconda3/conda-bld/fonttools_1765445198468/work +frozendict @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_8b0cest_id/croot/frozendict_1713194839836/work +frozenlist @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_6eeg1bc_1e/croot/frozenlist_1730902809317/work +fsspec @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_2197in1_5l/croot/fsspec_1736274678041/work +future @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_14jxr8efxg/croot/future_1677599893084/work +geographiclib @ file:///home/conda/feedstock_root/build_artifacts/geographiclib_1755865096708/work +geopandas @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_8b70v8i18p/croot/geopandas-split_1755761507058/work +geoplot @ file:///home/conda/feedstock_root/build_artifacts/geoplot_1736335046714/work +geopy @ file:///home/conda/feedstock_root/build_artifacts/geopy_1734341931581/work +gitdb==4.0.12 +GitPython==3.1.45 +glob2 @ file:///home/linux1/recipes/ci/glob2_1610991677669/work +gmpy2 @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_51391juwln/croot/gmpy2_1738085477864/work google-api-core==2.25.1 google-api-python-client==2.177.0 google-auth==2.40.3 google-auth-httplib2==0.2.0 google-auth-oauthlib==1.2.2 googleapis-common-protos==1.70.0 +gspread==6.2.0 +gspread-dataframe==4.0.0 +h11==0.16.0 +hf-xet @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_79w069vczw/croot/hf-xet_1755725955657/work +httpcore==1.0.9 httplib2==0.22.0 +httpx==0.28.1 +huggingface_hub @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_29dc5yixdo/croot/huggingface_hub_1755770856782/work idna==3.10 +importlib-metadata @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_5498c88e7n/croot/importlib_metadata-suite_1704813534254/work +ipykernel @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_f428_5tjvx/croot/ipykernel_1705933835534/work +ipython @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_f02dzd_ff3/croot/ipython_1718287987043/work +jaraco.classes @ file:///tmp/build/80754af9/jaraco.classes_1620983179379/work +jedi @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_194648shy3/croot/jedi_1721058355221/work +Jinja2==3.1.6 +jiter==0.9.0 +joblib @ file:///opt/miniconda3/conda-bld/joblib_1757926350579/work +json5 @ file:///tmp/build/80754af9/json5_1624432770122/work +jsonpatch @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_3ajyoz8zoj/croot/jsonpatch_1714483362270/work +jsonpointer==2.1 +jsonschema==4.25.0 +jsonschema-specifications==2025.4.1 +jupyter-events @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_db0avcjzq5/croot/jupyter_events_1718738111427/work +jupyter-lsp @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_ae9br5v37x/croot/jupyter-lsp-meta_1699978259353/work +jupyter_client @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_58w2siozyz/croot/jupyter_client_1699455907045/work +jupyter_core @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_73nomeum4p/croot/jupyter_core_1718818302815/work +jupyter_server @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_d1t69bk94b/croot/jupyter_server_1718827086930/work +jupyter_server_terminals @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_e7ryd60iuw/croot/jupyter_server_terminals_1686870731283/work +jupyterlab @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_e2r14w4wga/croot/jupyterlab_1706802597734/work +jupyterlab-pygments @ file:///tmp/build/80754af9/jupyterlab_pygments_1601490720602/work +jupyterlab_server @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_315a64u22w/croot/jupyterlab_server_1699555438434/work +keyring @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_8fybah8hcr/croot/keyring_1709632516643/work +kiwisolver @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_cc2l_z_0ri/croot/kiwisolver_1737039586949/work +langcodes==3.5.0 +language_data==1.3.0 +libarchive-c @ file:///tmp/build/80754af9/python-libarchive-c_1617780486945/work +libmambapy @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_8fxbc14zt9/croot/mamba-split_1734469521691/work/libmambapy +mapclassify @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_eehfxr7d38/croot/mapclassify_1675157736601/work +marisa-trie==1.2.1 Markdown==3.8.2 +markdown-it-py==3.0.0 +MarkupSafe==3.0.2 +matplotlib==3.10.1 +matplotlib-inline @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_f6fdc0hldi/croots/recipe/matplotlib-inline_1662014472341/work +mdurl==0.1.2 +menuinst @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_7dz63k6mur/croot/menuinst_1718132539961/work +mercantile @ file:///home/conda/feedstock_root/build_artifacts/mercantile_1734075348980/work +mistune @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_17ya6k1sbs/croots/recipe/mistune_1661496228719/work +more-itertools @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_e2687gwsdv/croot/more-itertools_1700662157766/work +mpmath @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_17iu6a8a3m/croot/mpmath_1690848269369/work +multidict @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_de95nfn702/croot/multidict_1730905502686/work +multiprocess @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_5fxbcbjtcd/croot/multiprocess_1692294387834/work +murmurhash==1.0.13 +narwhals==2.0.1 +navigator-updater @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_15xz58jgev/croot/navigator-updater_1718030392983/work +nbclient @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_626hpwnurm/croot/nbclient_1698934218848/work +nbconvert @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_caxv2emy33/croot/nbconvert_1699022756174/work +nbformat @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_cbnf5nccgk/croot/nbformat_1694616744196/work +nest-asyncio @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_310vb5e2a0/croot/nest-asyncio_1708532678212/work +networkx @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_e6lt1dcyid/croot/networkx_1737040482044/work +notebook_shim @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_d6_ze10f45/croot/notebook-shim_1699455897525/work +numexpr @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_45yefq0kt6/croot/numexpr_1696515289183/work +numpy==1.26.4 +oauth2client==4.1.3 oauthlib==3.3.1 +openai==1.77.0 +opentelemetry-api @ file:///opt/miniconda3/conda-bld/opentelemetry-api_1758530905861/work +overrides @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_70s80guh9g/croot/overrides_1699371144462/work +packaging==25.0 +pandas==2.3.1 +pandocfilters @ file:///opt/conda/conda-bld/pandocfilters_1643405455980/work +parso @ file:///opt/conda/conda-bld/parso_1641458642106/work +pexpect @ file:///tmp/build/80754af9/pexpect_1605563209008/work +pillow==11.3.0 +pkce @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_da285fiplp/croot/pkce_1690384839054/work +pkginfo @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_21aly_cba3/croot/pkginfo_1715695988648/work +platformdirs @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_a8u4fy8k9o/croot/platformdirs_1692205661656/work +pluggy @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_croot-w6jyveby/pluggy_1648109277227/work +ply==3.11 +preshed==3.0.10 +prometheus-client @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_25sgeyk0j5/croots/recipe/prometheus_client_1659455103277/work +prompt-toolkit @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_c63v4kqjzr/croot/prompt-toolkit_1704404354115/work +propcache @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_a337s3hyzy/croot/propcache_1744012732470/work proto-plus==1.26.1 protobuf==6.31.1 +psutil @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_1310b568-21f4-4cb0-b0e3-2f3d31e39728k9coaga5/croots/recipe/psutil_1656431280844/work +ptyprocess @ file:///tmp/build/80754af9/ptyprocess_1609355006118/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl +pure-eval @ file:///opt/conda/conda-bld/pure_eval_1646925070566/work +pyarrow==21.0.0 pyasn1==0.6.1 pyasn1_modules==0.4.2 -pyparsing==3.2.3 +pycosat @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_19qelmdbl6/croot/pycosat_1714510743067/work +pycparser @ file:///tmp/build/80754af9/pycparser_1636541352034/work +pydantic==2.11.7 +pydantic_core==2.33.2 +pydeck==0.9.1 +Pygments==2.19.2 +PyJWT @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_e55dbbf3h7/croot/pyjwt_1715094746385/work +pyogrio @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_fcc0mt59ed/croot/pyogrio_1741107184691/work +pyOpenSSL @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_94bn0tgaw5/croot/pyopenssl_1708381744097/work +pyparsing @ file:///opt/miniconda3/conda-bld/pyparsing_1763973787496/work +pyproj @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_80acrjskf3/croot/pyproj_1739284804477/work +PyQt5==5.15.10 +PyQt5-sip @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_7c_9f71up5/croot/pyqt-split_1698769091879/work/pyqt_sip +PyQtWebEngine==5.15.6 +pyshp @ file:///home/conda/feedstock_root/build_artifacts/pyshp_1764355200360/work +PySocks @ file:///Users/ktietz/ci_310/pysocks_1643961536721/work +python-dateutil @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_66ud1l42_h/croot/python-dateutil_1716495741162/work +python-dotenv @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_3ahu1_p9lb/croot/python-dotenv_1669132571851/work +python-json-logger @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_c3baq2ko4j/croot/python-json-logger_1683823815343/work +pytz==2025.2 +PyYAML @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_a8_sdgulmz/croot/pyyaml_1698096054705/work +pyzmq @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_43pxpbos3z/croot/pyzmq_1705605108344/work +QtPy @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_7ctc7lxlar/croot/qtpy_1700144861044/work +rasterio @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_bc_alnxcvr/croot/rasterio_1740070391819/work +referencing==0.36.2 +regex @ file:///opt/miniconda3/conda-bld/regex_1758887993323/work requests==2.32.4 requests-oauthlib==2.0.0 +requests-toolbelt @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_3fee1fr2ex/croot/requests-toolbelt_1690874011813/work +rfc3339-validator @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_76ae5cu30h/croot/rfc3339-validator_1683077051957/work +rfc3986-validator @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_d0l5zd97kt/croot/rfc3986-validator_1683058998431/work +rich==14.1.0 +rpds-py==0.26.0 rsa==4.9.1 +ruamel-yaml-conda @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_ffli8pikr0/croot/ruamel_yaml_1667489741108/work +ruamel.yaml @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_aeh5mqcw49/croot/ruamel.yaml_1666304555976/work +ruamel.yaml.clib @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_f64xdg2rww/croot/ruamel.yaml.clib_1666302244208/work +safetensors @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_40p0nh5amn/croot/safetensors_1741361303980/work +scikit-learn @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_aew5bgsgui/croot/scikit-learn_1753427393651/work +scipy @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_cc_7w74fto/croot/scipy_1743153255057/work/dist/scipy-1.15.2-cp310-cp310-macosx_11_0_arm64.whl#sha256=ee07a0e6b02a203db8a258a51b5822186b365c46a776093594d3a9a290ecd92d +seaborn @ file:///home/conda/feedstock_root/build_artifacts/seaborn-split_1733730015268/work +semver @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_99ujwp04tw/croot/semver_1709243633470/work +Send2Trash @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_5b31f0zzlv/croot/send2trash_1699371144121/work +sentence-transformers @ file:///home/conda/feedstock_root/build_artifacts/sentence-transformers_1758642484745/work +shapely @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_869puci72j/croot/shapely_1722533169124/work +shellingham==1.5.4 +sip @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_ba9odc_3u1/croot/sip_1698675938651/work +six==1.17.0 +smart_open==7.3.0.post1 +smmap==5.0.2 +sniffio @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_1573pknjrg/croot/sniffio_1705431298885/work +soupsieve @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_9798xzs_03/croot/soupsieve_1696347567192/work +spacy==3.8.7 +spacy-legacy==3.0.12 +spacy-loggers==1.0.5 +srsly==2.5.1 +stack-data @ file:///opt/conda/conda-bld/stack_data_1646927590127/work +streamlit==1.47.1 +sympy @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_9b15dnv4zk/croot/sympy_1756713487054/work +tenacity==9.1.2 +terminado @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_fcfvyc0an2/croot/terminado_1671751835701/work +thinc==8.3.6 +threadpoolctl @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_efa5bvb6vi/croot/threadpoolctl_1719407806403/work +tinycss2 @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_fcw5_i306t/croot/tinycss2_1668168825117/work +tokenizers @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_edx0_h5d0r/croot/tokenizers_1741370364286/work +toml==0.10.2 +tomli @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_d0e5ffbf-5cf1-45be-8693-c5dff8108a2awhthtjlq/croots/recipe/tomli_1657175508477/work +toolz @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_362wyqvvgy/croot/toolz_1667464079070/work +torch @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_951ooclnw1/croot/libtorch_1746637517770/work +tornado==6.5.1 +tqdm==4.67.1 +traitlets @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_500m2_1wyk/croot/traitlets_1718227071952/work +transformers @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_d4vmqk9i5p/croot/transformers_1741723022024/work +truststore @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_42mm7e6j06/croot/truststore_1695244298716/work +typer==0.16.0 +typing-inspection==0.4.1 +typing_extensions==4.14.1 +tzdata==2025.2 +ujson @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_dexxju769y/croot/ujson_1717597527341/work uritemplate==4.2.0 urllib3==2.5.0 +wasabi==1.1.3 +wcwidth @ file:///Users/ktietz/demo/mc3/conda-bld/wcwidth_1629357192024/work +weasel==0.4.1 +webencodings==0.5.1 +websocket-client @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_d37u7gqts8/croot/websocket-client_1715878310260/work +wrapt==1.17.2 +xxhash @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_4dqzb3ngyc/croot/python-xxhash_1737039922516/work +xyzservices @ file:///opt/miniconda3/conda-bld/xyzservices_1758720200972/work +yarl @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_01an_nyy_6/croot/yarl_1732546853427/work +zipp @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_31jm3q76eq/croot/zipp_1704206913245/work +zstandard @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_054juzz3it/croot/zstandard_1714677666952/work From 2f5ca79819b23ed23ddbb8dce23a67208e653be2 Mon Sep 17 00:00:00 2001 From: Silke Nodwell Date: Sun, 25 Jan 2026 17:16:48 +0000 Subject: [PATCH 3/5] Add file for retrieving the blog information from csv --- tools/blog_automation/blog_info_snapshot.csv | 31 ++++++++++++ .../blog_information_from_spreadsheet.py | 46 ++++++++++++++++++ ...tml_conversion.py => save_blog_as_html.py} | 48 ++++++++++--------- 3 files changed, 102 insertions(+), 23 deletions(-) create mode 100644 tools/blog_automation/blog_info_snapshot.csv create mode 100644 tools/blog_automation/blog_information_from_spreadsheet.py rename tools/blog_automation/{doc_to_html_conversion.py => save_blog_as_html.py} (94%) diff --git a/tools/blog_automation/blog_info_snapshot.csv b/tools/blog_automation/blog_info_snapshot.csv new file mode 100644 index 00000000..5d4bb094 --- /dev/null +++ b/tools/blog_automation/blog_info_snapshot.csv @@ -0,0 +1,31 @@ +,url,doc_id,author_name,author_role,description,source +0,https://drive.google.com/open?id=13W0nyEjCzwxeSa3x_TdOXO_kr2DhXgNBO8SwUox3TYk,,Sahana Venkatesh,Wayve,"Generative AI, a transformative technology, has seen rapid advancements and widespread adoption in recent years. Major tech companies and startups are in a competitive race to develop superior models, while businesses and governments explore its applications and implications + +My journey with GenAI began with a Hackathon for Social Good, creating Anxi-Aid, a tool utilizing AI for personalized anxiety management plans. This blogpost highlighted the potential of Generative AI in enhancing lives. To harness this technology, I walk through how one needs to understand use case, master prompt engineering, and leverage existing tools like OpenAI’s API.",https://www.peoplematters.in/site/interstitial?return_to=%2Farticle%2Fbusiness%2Fai-adoption-spikes-as-companies-start-seeing-real-value-from-gen-ai-41550 +1,https://drive.google.com/open?id=1iMytEs9YfkLhjLmBKX6QZdKN4bcCOMw9,,Adriana Zencke Zimmermann,Tech Lead,Essential Tips for Software Engineer Interview Success,https://unsplash.com/ +2,https://drive.google.com/open?id=1kcw5AFt72RY3y09yWHRhIGM6lzwtSCzZNAyA20pFjVk,,Eleonora Belova,Test Automation Engineer,"Going through a job search and preparing for interviews can often feel like navigating a labyrinth of challenges and uncertainties. To shed light on this crucial phase, we reached out to various community members to gather their insights, experiences, and strategies for success. In this article you will find their personal stories, highlighting how they handle rejection, tackle tough technical interviews, and employ unique methods to stand out to potential employers. Additionally, we explore the resources and tools they find valuable in their preparation journey.",https://unsplash.com/photos/woman-in-teal-t-shirt-sitting-beside-woman-in-suit-jacket-JaoVGh5aJ3E +3,https://drive.google.com/open?id=195q76fSmTYhnEwukbLUFtlx9tIjA4Qs5,,Madhura Chaganty,Engineering Manager at Paythru,Have you ever considered the environmental impact of the software code we write?,Free image from Canva +4,https://drive.google.com/open?id=1LKIOrIUz_ff8gZpfEaUeks7yhk35WsDy,,Arzu Caner,Full Stack Developer at InnoAl Tech solution,I wrote about my Al DevCamp journey,I created with Canva +5,https://drive.google.com/open?id=1B6pisvehhDNhCWhaINdMqqXVjMLp-Mx5,,Busra Ecem Sakar,Senior Data Analyst,How does Natural Language Processing (NLP) interact between people and technology?,"I used ChatGPT image generator to create pictures, and I added a graph from the research that I mentioned in my blog." +6,https://drive.google.com/open?id=1SF5gRqBhTzzsXrcSdT8HgVBcUV_-a7d7XR-4cUoFtdI,,Airat Yusuff,Software Engineer / BJSS,"Learn how to use PartyRock by AWS to build your own AI app in minutes, NO coding involved!",I created in Canva +7,https://drive.google.com/open?id=1hAxMDwpgJv3lsBipWcfG3aiVmIVoQ7mG,,Busra Ecem Sakar,Senior Data Analyst,It is about NLP.,This image were created by AI. +8,https://drive.google.com/open?id=1gH4kd2-KeH8fbp2rAefJm0ewZVGlZ79N,,Irina Kamalova,VP Lead Software Engineer,Microservices Paradise,I created them +9,https://drive.google.com/open?id=1MKNGzDgMBRurakr1bBeldhl78Vy0dXSY,,Irina Kamalova,VP Lead Software Engineer @ JPMorgan Chase,Tailoring your career quarterly,I created them +10,https://drive.google.com/open?id=19Y3brnnkXSBv1ba-KeAcHmNx9h2J86MjwoCTg1ip_6c,,Sahana Venkatesh,"Software Engineer, Wayve",Sharing learnings from one of most interesting tech conferences in the world.,I asked my sister and got her consent before sharing. +11,https://drive.google.com/open?id=1bUclrl46Dvd4gvZEVzCsmddcKA-YvG9K,,Madhura Chaganty,Engineering Manager @Elliptic,"Kubernetes (or K8s) has no shortage of resources and guides, yet diving into its complexities can still feel overwhelming, especially for beginners. As I embark on my own journey of learning Kubernetes, I wanted to tackle one of its core processes—how a Pod, the fundamental unit of deployment in K8s, progresses from a simple YAML definition to a fully Running state in the cluster. This blog is my first attempt at understanding and simplifying this crucial process.",I created the infographics on my own. +12,https://drive.google.com/open?id=16-BUIHjT0wtwhxfQeTF8x45Axq02cEkF-WM4GFf240M,,Julia Babahina,Lead Risk Management Specialist at Swift ,Risk management in engineering and beyond ,"Leal, R. (2024) What is ISO3100?, Advisera [Online] https://advisera.com/articles/what-is-iso-31000/ (Accessed June 2024) " +13,https://drive.google.com/open?id=1iNIR60YIrNkLUq5yzKgFye2xUW3DvDxlaRdHMHhij2I,,Sahana Venkatesh,Software Engineer,"TechCrunch Disrupt 2024 brought together global tech leaders to explore cutting-edge innovations. I interviewed Madhuvanthi Venkatesh, a data scientist and AI meetup organizer, about her key takeaways: the rise of “tiny LLMs,” the importance of real networking, and Silicon Valley’s leadership in AI.",Madhu shared her photo +14,https://drive.google.com/open?id=1eivs1x6Drqz6sYjOpL1qRpNicXnC6AS4,,Julia Babahina,Lead Risk Management Specialist ,How to build a career development plan,Created in canvas and power point +15,https://drive.google.com/open?id=1bYFPwxSrdCAKV06vROjRxrMe1xSMnqxyE2-9UygO_pY,,Peace Onyehanere,Software Engineer,Preparing for the AWS Certification Exam,Google search +16,https://drive.google.com/open?id=13-rdjlPEaMP2x8-SKCFP9wwvoAU6UdA3,,Angela Busato,Frontend Software Developer,"I used the question suggested in the meetup ""What inspired me to start coding, and how has it changed my life?”",From Codemotion Conference +17,https://drive.google.com/open?id=1sfB-6ziaDEiStmM95pSYlcGhDsDXTJ8ZCaud7Uhxi5M,,Sahana Venkatesh,WCC newsletter,"This article highlights the journey of the winning team in the Gemma2 AI Challenge, a hackathon hosted by Lablab AI and the Gemma team at Google. It explores the team's ideation process, the technical aspects of their innovative Chrome extension, and the power skills—such as teamwork, perseverance, and effective communication—that played a key role in their success. The article provides insights into the challenges faced, solutions developed, and valuable lessons learned, offering inspiration for aspiring developers and AI enthusiasts.",Ying gave it to me. +18,https://drive.google.com/open?id=12EU2xARTWzdYMwXCA-vSR58Mh5uwbuuJ,,Irina Kamalova,VP Lead Software Engineer @ JPMorgan Chase,"Mid to Senior Engineer: Two strategies +",Excalidraw +19,https://drive.google.com/open?id=1Zh2AYmfoYwTt491rqABm9gX-HnE84UbU,,Nino Godoradze,Data Scientist as Bank of Georgia,"Atomic Habits: Small Changes, Real Results",https://www.pexels.com/search/ +20,https://drive.google.com/open?id=13a94d83AEDYD54weCc7RJp0Arfr8WysC,,Büşra Ç. T. (This is how I'd like my name to look like on the post),-,"A personal reflection on learning to code as a self-taught developer, battling imposter syndrome, and how mentorship helped me stop looking only at what I lacked and start seeing how far I’d come.",Chat GPT +21,https://drive.google.com/open?id=1SaT3sHo0P1dG1lFus8HrLnd0W5TKBJ-_JkSvUEF1oxY,,Rajani Rao,"Principal Technologist, AVEVA","When They Said It Was Over, We Said It Was Just Beginning: Our First Year as WomenCodingCommunity",From my phone +22,https://drive.google.com/open?id=146OJMkVYqteZIT_dzsprMqgyAPE91DBl,,Silke Nodwell,Analyst at Altus Group & Lead at Women Coding Community,"After running a four-week Atomic Habits challenge with the Women Coding Community, I put a few strategies to the test — from early wake-ups to study sessions in coffee shops. In this post, I reflect on what worked, what surprised me, and how one small change to my morning routine helped me finally move past procrastination.","Generated with Nebius AI studio, model black-forest-labs/flux-dev" +23,https://docs.google.com/document/d/1V6balLTykSgUqVfIjOVL6uXxB80lIc4hCiRqqSaWGQE/edit?tab=t.0#heading=h.w2lwu8m6kvda,1V6balLTykSgUqVfIjOVL6uXxB80lIc4hCiRqqSaWGQE,Eleonora Belova,Lead at Women Coding Community,"What started as a side project with just a handful of Google Forms has since grown into a vibrant mentorship community, which helped people grow, land new roles, and build meaningful connections along the way.", +24,https://nishiajmera.com/never-eat-alone-book-summary-80c571603306,,Nishi Ajmera,Manager Technology ,Summary of the book never eat alone, +25,https://docs.google.com/document/d/1PMMpTQZZ9r4dUAY9WZAapEuJ6_rfSY9Fv3FSp90gNPw/edit?usp=sharing,1PMMpTQZZ9r4dUAY9WZAapEuJ6_rfSY9Fv3FSp90gNPw,Udeme Jalekun,Senior QA Engineer / Raenest ,"I can write on software and specialised testing like microservice testing, security testing, and accessibility. I also write on roadmap and career development planning, mentorship, and leadership.",Gemini +26,https://docs.google.com/document/d/1ttn2qP6OuA9wPmCx-zkHqtklykMDWNYTFqGyNZzisgM/edit?usp=sharing,1ttn2qP6OuA9wPmCx-zkHqtklykMDWNYTFqGyNZzisgM,Silke Nodwell,Lead at Women Coding Community,"A behind-the-scenes look at how four women from Women Coding Community, spanning three countries and time zones, built a donor-focused platform for the GNEC Hackathon and placed 3rd out of more than 150 teams. From forming the team on Slack to designing an impact tracker, foodbank map and natural-language charity recommender, this post reflects on collaboration, problem solving and what it takes to succeed in a global hackathon.", diff --git a/tools/blog_automation/blog_information_from_spreadsheet.py b/tools/blog_automation/blog_information_from_spreadsheet.py new file mode 100644 index 00000000..aca48cf1 --- /dev/null +++ b/tools/blog_automation/blog_information_from_spreadsheet.py @@ -0,0 +1,46 @@ +import re +import pandas as pd + +SPREADSHEET_ID = '1Pje2qOn23OgtAyhjqKwQFYcaEAE3gAy5f3T_5LCgA2o' + +def _extract_doc_id_from_url(url): + """Extract the document ID from a Google Docs URL.""" + match = re.search(r'/document/d/([a-zA-Z0-9-_]+)', url) + if match: + return match.group(1) + else: + return None + +def _extract_and_rename_relevant_fields(df): + formatted_df = pd.DataFrame({}) + formatted_df['url'] = df['Upload your writing draft for review'] + formatted_df['doc_id'] = formatted_df['url'].apply(_extract_doc_id_from_url) + formatted_df['author_name'] = df['What is your full name? '] + formatted_df['author_role'] = df['What is your position / company you are working at / associated with? '] + formatted_df['description'] = df['Please provide a short description of your writing idea / blog post? '] + formatted_df['source'] = df[ + 'Please provide a source of how you obtained/created the infographic/photo/picture used.' + ] + return formatted_df + +def dataframe_of_blog_spreadsheet_info(spreadsheet_id=SPREADSHEET_ID): + import gspread + import pandas as pd + + gc = gspread.service_account(filename="service_account_key.json") + sh = gc.open_by_key(spreadsheet_id) + worksheet = sh.worksheet("Form Responses 1") + data = worksheet.get_all_records() + df = pd.DataFrame(data) + return df + +if __name__=='__main__': + df = dataframe_of_blog_spreadsheet_info() + formatted_df = df.pipe(_extract_and_rename_relevant_fields) + formatted_df.to_csv('blog_info_snapshot.csv') + + + + + + diff --git a/tools/blog_automation/doc_to_html_conversion.py b/tools/blog_automation/save_blog_as_html.py similarity index 94% rename from tools/blog_automation/doc_to_html_conversion.py rename to tools/blog_automation/save_blog_as_html.py index 7f8c6cc7..f391dfb8 100644 --- a/tools/blog_automation/doc_to_html_conversion.py +++ b/tools/blog_automation/save_blog_as_html.py @@ -26,6 +26,8 @@ --- ''' +# TODO: Use information from spreadsheet with optional doc_ID param + def _current_directory(): return Path(__file__).resolve().parent @@ -43,26 +45,9 @@ def drive_connection(): drive = build('drive', 'v3', credentials=creds) return drive -drive = drive_connection() - -def _posts_directory(): - # Path to the directory where the script itself is located - script_dir = _current_directory() - - # Construct the path relative to the script’s location - posts_dir = (script_dir / "../../_posts").resolve() - - return posts_dir - -def _today_date_str(): - return dt.date.today().isoformat() - -def _create_blog_filename_with_date(doc_name, date_str): - formatted_blog_title = doc_name.lower().replace(' ', '-').strip() - filename = f"{date_str}-{formatted_blog_title}" - return filename +DRIVE = drive_connection() -def _get_info_from_spreadsheet(drive=drive, spreadsheet_id=SPREADSHEET_ID): +def get_blog_info_from_spreadsheet(spreadsheet_id=SPREADSHEET_ID): import gspread import pandas as pd @@ -83,8 +68,25 @@ def _get_info_from_spreadsheet(drive=drive, spreadsheet_id=SPREADSHEET_ID): df = pd.DataFrame(data) return df +def _posts_directory(): + # Path to the directory where the script itself is located + script_dir = _current_directory() + + # Construct the path relative to the script’s location + posts_dir = (script_dir / "../../_posts").resolve() + + return posts_dir + +def _today_date_str(): + return dt.date.today().isoformat() + +def _create_blog_filename_with_date(doc_name, date_str): + formatted_blog_title = doc_name.lower().replace(' ', '-').strip() + filename = f"{date_str}-{formatted_blog_title}" + return filename + def _update_yaml_header_with_spreadsheet_info(yaml_header): - spreadsheet_info = _get_info_from_spreadsheet(drive=drive).iloc[-1].to_dict() + spreadsheet_info = get_blog_info_from_spreadsheet(drive=DRIVE).iloc[-1].to_dict() try: author_name = spreadsheet_info['What is your full name? '] author_role = spreadsheet_info[ @@ -102,7 +104,7 @@ def _update_yaml_header_with_spreadsheet_info(yaml_header): except KeyError as error: print(f'Unable to find relevant spreadsheet field. Please check the spreadsheet carefully.\n{error}') -def export_blog_as_html(document_id, spreadsheet_info, date=None, drive=drive): +def export_blog_as_html(document_id, date=None, drive=DRIVE): if date is None: date = _today_date_str() @@ -155,7 +157,7 @@ def download_blog_image(spreadsheet_info): file_id = re.search(r'drive\.google\.com/file/d/([^/]+)/', blog_image_drive_link).group(1) # Download the image file try: - request = drive.files().get_media(fileId=file_id) + request = DRIVE.files().get_media(fileId=file_id) image_data = request.execute() # Save the image locally image_filename = f"blog_image_{file_id}.jpg" @@ -176,7 +178,7 @@ def copy_image_to_blog_assets(image_filename, blog_filename): return f"/assets/images/blog/{new_image_filename}" def export_blog_with_image(document_id): - spreadsheet_info = _get_info_from_spreadsheet(drive=drive) + spreadsheet_info = get_blog_info_from_spreadsheet(drive=DRIVE) blog_filename = export_blog_as_html(document_id, spreadsheet_info) image_filename = download_blog_image(spreadsheet_info) copy_image_to_blog_assets(image_filename, blog_filename) From b0917adb761ff375d97e3a6217a240c86dfd850e Mon Sep 17 00:00:00 2001 From: Silke Nodwell Date: Tue, 27 Jan 2026 23:21:52 +0000 Subject: [PATCH 4/5] Add GitHub Actions workflow and update main blog_exporter script to update info from csv, including downloading the blog image --- .github/workflows/run_blog_exporter.yml | 92 +++++++ tools/blog_automation/README.md | 13 +- tools/blog_automation/blog_exporter.py | 256 ++++++++++++++++++ ...sheet.py => blog_info_from_spreadsheet.py} | 6 +- tools/blog_automation/blog_info_snapshot.csv | 56 ++-- tools/blog_automation/check_for_new_blogs.py | 73 +++++ tools/blog_automation/requirements.txt | 4 +- tools/blog_automation/save_blog_as_html.py | 192 ------------- tools/blog_automation/test_blog_exporter.py | 26 ++ 9 files changed, 495 insertions(+), 223 deletions(-) create mode 100644 .github/workflows/run_blog_exporter.yml create mode 100644 tools/blog_automation/blog_exporter.py rename tools/blog_automation/{blog_information_from_spreadsheet.py => blog_info_from_spreadsheet.py} (92%) create mode 100644 tools/blog_automation/check_for_new_blogs.py delete mode 100644 tools/blog_automation/save_blog_as_html.py create mode 100644 tools/blog_automation/test_blog_exporter.py diff --git a/.github/workflows/run_blog_exporter.yml b/.github/workflows/run_blog_exporter.yml new file mode 100644 index 00000000..152cd09d --- /dev/null +++ b/.github/workflows/run_blog_exporter.yml @@ -0,0 +1,92 @@ +name: Import Meetup Events + +on: + workflow_dispatch: + schedule: + - cron: '0 7 * * *' # check for new blogs at 7:00am +jobs: + check-for-changes: + if: github.repository == 'Women-Coding-Community/WomenCodingCommunity.github.io' + runs-on: ubuntu-latest + outputs: + has_new_rows: ${{ steps.check-blog.outputs.has_new_rows }} + new_row_indices: ${{ steps.check-blog.outputs.new_row_indices }} + + steps: + - name: Checkout repository + uses: actions/checkout@v5 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Cache pip + uses: actions/cache@v4 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('tools/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r tools/requirements.txt + + - name: Check for new blog entries + id: check-blog + run: | + cd tools/blog_automation + python check_for_new_blogs.py + + # If there are new rows, run the blog_exporter script + run-blog-automation: + needs: check-for-changes + if: needs.check-for-changes.outputs.has_new_rows == 'true' + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v5 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Cache pip + uses: actions/cache@v4 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('tools/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r tools/requirements.txt + + - name: Export new blogs + run: | + cd tools/blog_automation + for row_index in ${{ needs.check-for-changes.outputs.new_row_indices }}; do + python blog_exporter.py --row_index "$row_index" + done + + - name: Create or Update Pull Request + id: create-pr + uses: peter-evans/create-pull-request@v7 + with: + token: ${{ secrets.GHA_ACTIONS_ALLOW_TOKEN }} + commit-message: "Automated blog import from Google Docs" + branch: "automation/import-blog" + team-reviewers: "Women-Coding-Community/leaders" + title: "Automated import of blog posts" + body: | + This PR was created automatically by a GitHub Action to import new blog posts. + The new blog posts have been added to `_posts/` directory. + The `blog_info_snapshot.csv` has been updated to track processed entries. + labels: | + automation \ No newline at end of file diff --git a/tools/blog_automation/README.md b/tools/blog_automation/README.md index 426862a9..1d2d9cc5 100644 --- a/tools/blog_automation/README.md +++ b/tools/blog_automation/README.md @@ -77,6 +77,17 @@ Use this ID in your scripts when exporting the document. ## Run Automation 1. Activate virtual environment: `source venv/bin/activate` -2. Run the script: `python doc_to_html_conversion.py ` +2. Run the script: `python blog_exporter [--row_index ]`, where the row_index refers to the row of the CSV. This defaults to -1, or the last row in the CSV. + +**Notes and Options** +- The blog csv defaults to blog_info_snapshot.csv + +## Tests + +Run `pytest test_blog_exporter.py` + +## GitHub Actions automation +There is a GitHub Action .github/workflows/run_blog_exporter.yml which checks for any new rows in the blog_info_snapshot.csv, and runs the blog_exporter.py script for each new row. + diff --git a/tools/blog_automation/blog_exporter.py b/tools/blog_automation/blog_exporter.py new file mode 100644 index 00000000..f8d5d58f --- /dev/null +++ b/tools/blog_automation/blog_exporter.py @@ -0,0 +1,256 @@ +import argparse +import os +import re +import shutil +import datetime as dt +from pathlib import Path +import markdown +import pandas as pd +from google.oauth2 import service_account +from googleapiclient.discovery import build +from googleapiclient.errors import HttpError + +# --- Configuration --- +SERVICE_ACCOUNT_FILE = 'service_account_key.json' +YAML_HEADER = '''--- +layout: post +title: {title} +date: {date} +author_name: {author_name} +author_role: {author_role} +image: {image_path} +image_source: {image_source} +description: {description} +category: blog +--- +''' + +def _current_directory(): + return os.path.dirname(os.path.abspath(__file__)) + +def drive_connection(): + service_account_path = os.path.join(_current_directory(), SERVICE_ACCOUNT_FILE) + if not os.path.exists(service_account_path): + print(f"ERROR: Service account key file '{service_account_path}' not found.\n" + "Please obtain your own Google service account key and place it at this path.\n" + "(Never commit this file to version control.)") + exit(1) + creds = service_account.Credentials.from_service_account_file( + service_account_path, + scopes=['https://www.googleapis.com/auth/drive.readonly'] + ) + drive = build('drive', 'v3', credentials=creds) + return drive + +def _posts_directory(): + script_dir = Path(_current_directory()) + posts_dir = (script_dir / "../../_posts").resolve() + return posts_dir + +def _today_date_str(): + return dt.date.today().isoformat() + +def _create_blog_filename_with_date(doc_name, date_str): + formatted_blog_title = doc_name.lower().replace(' ', '-').strip() + filename = f"{date_str}-{formatted_blog_title}" + return filename + +def _get_doc_name_from_drive(doc_id, drive): + """Fetch document name from Google Drive.""" + try: + file = drive.files().get(fileId=doc_id, fields='name').execute() + return file['name'] + except HttpError as error: + print(f"ERROR: Could not fetch document from Drive (ID: {doc_id})\n{error}") + return None + +def _get_doc_content_as_markdown(doc_id, drive): + """Export Google Doc as markdown.""" + try: + request = drive.files().export_media(fileId=doc_id, mimeType='text/markdown') + file_content = request.execute() + return file_content.decode('utf-8') + except HttpError as error: + print(f"ERROR: Could not export document from Drive (ID: {doc_id})\n{error}") + return None + +def _markdown_to_html(markdown_text): + """Convert Markdown to HTML with custom formatting.""" + html = markdown.markdown(markdown_text) + + # Remove tags from inside heading tags + html = re.sub(r'(.+?)', r'\2', html) + + # Remove the first heading if present + html = re.sub(r'^.*?\s*', '', html, flags=re.DOTALL) + + # Wrap the body in
+ html_body = f'
\n{html}\n
' + + return html_body + +def _download_blog_image(blog_image_drive_link, drive): + """Download image from Google Drive link.""" + + pattern = re.compile(r"(?:id=|/d/)([^/&?]+)") + + try: + file_id = re.search( + pattern, + blog_image_drive_link + ) + if not file_id: + raise Exception(f"WARNING: Could not extract file ID from image link: {blog_image_drive_link}") + + file_id = file_id.group(1) + print(f'{file_id=}') + file_metadata = drive.files().get(fileId=file_id, fields='name, mimeType').execute() + file_name = file_metadata['name'] + + request = drive.files().get_media(fileId=file_id) + file_content = request.execute() + + # Save temporarily + temp_path = os.path.join(_current_directory(), file_name) + with open(temp_path, 'wb') as f: + f.write(file_content) + + return temp_path + except HttpError as error: + print(f"WARNING: Could not download image from Drive\n{error}") + return None + +def _copy_image_to_blog_assets(image_path, blog_filename): + """Copy image to assets directory and return relative path.""" + if not image_path or not os.path.exists(image_path): + return None + + assets_dir = Path(_current_directory()).resolve().parent.parent / 'assets' / 'images' / 'blog' + assets_dir.mkdir(parents=True, exist_ok=True) + + new_image_filename = blog_filename.split('.')[0] + '.' +image_path.split('.')[-1] + new_image_path = assets_dir / new_image_filename + + shutil.copy(image_path, new_image_path) + + return f"/assets/images/blog/{new_image_filename}" + +# def _get_image_path_from_blog_filename_and_image_extension(blog_filename, image_extension): +# assets_dir = Path(_current_directory()).resolve().parent.parent / 'assets' / 'images' / 'blog' +# image_filename = assets_dir / (blog_filename.split('.')[0] + image_extension) +# return image_filename + +def download_image_and_copy_to_repo(image_link, blog_filename, drive): + downloaded_image_path = _download_blog_image(image_link, drive) + # if downloaded_image_path is not None: + # image_path_relative = _get_image_path_from_blog_filename_and_image_extension( + # blog_filename, image_extension=downloaded_image_path.split('.')[-1] + # ) + + image_path_relative = _copy_image_to_blog_assets( + downloaded_image_path, + blog_filename + ) + + os.remove(downloaded_image_path) # Clean up temp file + + return image_path_relative + + +def export_blog_from_csv_row(row_index, csv_path=None, doc_id_override=None, date=None): + """ + Export a blog from a CSV row. + + Args: + row_index: Index of the row in the CSV + csv_path: Path to CSV file (defaults to blog_info_snapshot.csv in current dir) + doc_id_override: Optional Google Doc ID to override the one in CSV + date: Blog post date (defaults to today) + + Returns: + blog_filename if successful, None otherwise + """ + if csv_path is None: + csv_path = os.path.join(_current_directory(), 'blog_info_snapshot.csv') + + if date is None: + date = _today_date_str() + + # Read CSV and get row + try: + df = pd.read_csv(csv_path, index_col=0) + blog_info_ser = df.iloc[row_index] + except (FileNotFoundError, IndexError) as e: + print(f"ERROR: Could not read CSV row {row_index}\n{e}") + return None + + # Determine doc_id + doc_id = doc_id_override or blog_info_ser.get('doc_id') + + if pd.isna(doc_id) or not doc_id: + print(f"SKIP: Row {row_index} has no doc_id (external blog link)") + raise ValueError("No doc_id found in spreadsheet row. Please specify a doc_id_override.") + + # Connect to Google Drive + drive = drive_connection() + + # 1. Get document name and content + doc_name = _get_doc_name_from_drive(doc_id, drive) + doc_content = _get_doc_content_as_markdown(doc_id, drive) + blog_filename = _create_blog_filename_with_date(doc_name, date) + + # 2. Convert to HTML + html_body = _markdown_to_html(doc_content) + + # 3. Build YAML header + author_name = blog_info_ser.get('author_name', 'Unknown') + author_role = blog_info_ser.get('author_role', '') + description = blog_info_ser.get('description', '') + source = blog_info_ser.get('source', '') + + + yaml_header = YAML_HEADER.format( + title=doc_name.title(), + date=date, + author_name=author_name, + author_role=author_role, + image_path='[IMAGE_PATH]', # Placeholder, will update after image download + image_source=source, + description=description + ) + + # 4. Download image if available + image_link = blog_info_ser.get('image_link') + if image_link: + image_path_relative = download_image_and_copy_to_repo( + image_link, blog_filename=blog_filename, drive=drive + ) + if image_path_relative: + yaml_header = yaml_header.replace('[IMAGE_PATH]', image_path_relative) + + # 5. Combine and save + final_html = yaml_header + '\n' + html_body + + posts_dir = _posts_directory() + posts_dir.mkdir(parents=True, exist_ok=True) + + filename = posts_dir / f"{blog_filename}.html" + with open(filename, 'w', encoding='utf-8') as f: + f.write(final_html) + + print(f"✓ Exported blog to: {filename}") + return blog_filename + + +if __name__ == "__main__": + # example usage: python blog_exporter.py # this will export the blog from the last row of the CSV + parser = argparse.ArgumentParser(description="Export a blog from CSV row into HTML.") + parser.add_argument( + "--row_index", type=int, default=-1, help="Index of the row in blog_info_snapshot.csv" + ) + parser.add_argument("--csv_path", help="Path to CSV file (default: blog_info_snapshot.csv)") + parser.add_argument("--doc_id", help="Override doc_id from CSV") + parser.add_argument("--date", help="Date for blog post (YYYY-MM-DD). Defaults to today.") + + args = parser.parse_args() + export_blog_from_csv_row(args.row_index, args.csv_path, args.doc_id, args.date) diff --git a/tools/blog_automation/blog_information_from_spreadsheet.py b/tools/blog_automation/blog_info_from_spreadsheet.py similarity index 92% rename from tools/blog_automation/blog_information_from_spreadsheet.py rename to tools/blog_automation/blog_info_from_spreadsheet.py index aca48cf1..f555306e 100644 --- a/tools/blog_automation/blog_information_from_spreadsheet.py +++ b/tools/blog_automation/blog_info_from_spreadsheet.py @@ -21,6 +21,7 @@ def _extract_and_rename_relevant_fields(df): formatted_df['source'] = df[ 'Please provide a source of how you obtained/created the infographic/photo/picture used.' ] + formatted_df['image_link'] = df['Submit your blog cover image'] return formatted_df def dataframe_of_blog_spreadsheet_info(spreadsheet_id=SPREADSHEET_ID): @@ -34,11 +35,14 @@ def dataframe_of_blog_spreadsheet_info(spreadsheet_id=SPREADSHEET_ID): df = pd.DataFrame(data) return df -if __name__=='__main__': +def save_blog_info_to_csv(): df = dataframe_of_blog_spreadsheet_info() formatted_df = df.pipe(_extract_and_rename_relevant_fields) formatted_df.to_csv('blog_info_snapshot.csv') +if __name__=='__main__': + save_blog_info_to_csv() + diff --git a/tools/blog_automation/blog_info_snapshot.csv b/tools/blog_automation/blog_info_snapshot.csv index 5d4bb094..b6b0d518 100644 --- a/tools/blog_automation/blog_info_snapshot.csv +++ b/tools/blog_automation/blog_info_snapshot.csv @@ -1,31 +1,31 @@ -,url,doc_id,author_name,author_role,description,source +,url,doc_id,author_name,author_role,description,source,image_link 0,https://drive.google.com/open?id=13W0nyEjCzwxeSa3x_TdOXO_kr2DhXgNBO8SwUox3TYk,,Sahana Venkatesh,Wayve,"Generative AI, a transformative technology, has seen rapid advancements and widespread adoption in recent years. Major tech companies and startups are in a competitive race to develop superior models, while businesses and governments explore its applications and implications -My journey with GenAI began with a Hackathon for Social Good, creating Anxi-Aid, a tool utilizing AI for personalized anxiety management plans. This blogpost highlighted the potential of Generative AI in enhancing lives. To harness this technology, I walk through how one needs to understand use case, master prompt engineering, and leverage existing tools like OpenAI’s API.",https://www.peoplematters.in/site/interstitial?return_to=%2Farticle%2Fbusiness%2Fai-adoption-spikes-as-companies-start-seeing-real-value-from-gen-ai-41550 -1,https://drive.google.com/open?id=1iMytEs9YfkLhjLmBKX6QZdKN4bcCOMw9,,Adriana Zencke Zimmermann,Tech Lead,Essential Tips for Software Engineer Interview Success,https://unsplash.com/ -2,https://drive.google.com/open?id=1kcw5AFt72RY3y09yWHRhIGM6lzwtSCzZNAyA20pFjVk,,Eleonora Belova,Test Automation Engineer,"Going through a job search and preparing for interviews can often feel like navigating a labyrinth of challenges and uncertainties. To shed light on this crucial phase, we reached out to various community members to gather their insights, experiences, and strategies for success. In this article you will find their personal stories, highlighting how they handle rejection, tackle tough technical interviews, and employ unique methods to stand out to potential employers. Additionally, we explore the resources and tools they find valuable in their preparation journey.",https://unsplash.com/photos/woman-in-teal-t-shirt-sitting-beside-woman-in-suit-jacket-JaoVGh5aJ3E -3,https://drive.google.com/open?id=195q76fSmTYhnEwukbLUFtlx9tIjA4Qs5,,Madhura Chaganty,Engineering Manager at Paythru,Have you ever considered the environmental impact of the software code we write?,Free image from Canva -4,https://drive.google.com/open?id=1LKIOrIUz_ff8gZpfEaUeks7yhk35WsDy,,Arzu Caner,Full Stack Developer at InnoAl Tech solution,I wrote about my Al DevCamp journey,I created with Canva -5,https://drive.google.com/open?id=1B6pisvehhDNhCWhaINdMqqXVjMLp-Mx5,,Busra Ecem Sakar,Senior Data Analyst,How does Natural Language Processing (NLP) interact between people and technology?,"I used ChatGPT image generator to create pictures, and I added a graph from the research that I mentioned in my blog." -6,https://drive.google.com/open?id=1SF5gRqBhTzzsXrcSdT8HgVBcUV_-a7d7XR-4cUoFtdI,,Airat Yusuff,Software Engineer / BJSS,"Learn how to use PartyRock by AWS to build your own AI app in minutes, NO coding involved!",I created in Canva -7,https://drive.google.com/open?id=1hAxMDwpgJv3lsBipWcfG3aiVmIVoQ7mG,,Busra Ecem Sakar,Senior Data Analyst,It is about NLP.,This image were created by AI. -8,https://drive.google.com/open?id=1gH4kd2-KeH8fbp2rAefJm0ewZVGlZ79N,,Irina Kamalova,VP Lead Software Engineer,Microservices Paradise,I created them -9,https://drive.google.com/open?id=1MKNGzDgMBRurakr1bBeldhl78Vy0dXSY,,Irina Kamalova,VP Lead Software Engineer @ JPMorgan Chase,Tailoring your career quarterly,I created them -10,https://drive.google.com/open?id=19Y3brnnkXSBv1ba-KeAcHmNx9h2J86MjwoCTg1ip_6c,,Sahana Venkatesh,"Software Engineer, Wayve",Sharing learnings from one of most interesting tech conferences in the world.,I asked my sister and got her consent before sharing. -11,https://drive.google.com/open?id=1bUclrl46Dvd4gvZEVzCsmddcKA-YvG9K,,Madhura Chaganty,Engineering Manager @Elliptic,"Kubernetes (or K8s) has no shortage of resources and guides, yet diving into its complexities can still feel overwhelming, especially for beginners. As I embark on my own journey of learning Kubernetes, I wanted to tackle one of its core processes—how a Pod, the fundamental unit of deployment in K8s, progresses from a simple YAML definition to a fully Running state in the cluster. This blog is my first attempt at understanding and simplifying this crucial process.",I created the infographics on my own. -12,https://drive.google.com/open?id=16-BUIHjT0wtwhxfQeTF8x45Axq02cEkF-WM4GFf240M,,Julia Babahina,Lead Risk Management Specialist at Swift ,Risk management in engineering and beyond ,"Leal, R. (2024) What is ISO3100?, Advisera [Online] https://advisera.com/articles/what-is-iso-31000/ (Accessed June 2024) " -13,https://drive.google.com/open?id=1iNIR60YIrNkLUq5yzKgFye2xUW3DvDxlaRdHMHhij2I,,Sahana Venkatesh,Software Engineer,"TechCrunch Disrupt 2024 brought together global tech leaders to explore cutting-edge innovations. I interviewed Madhuvanthi Venkatesh, a data scientist and AI meetup organizer, about her key takeaways: the rise of “tiny LLMs,” the importance of real networking, and Silicon Valley’s leadership in AI.",Madhu shared her photo -14,https://drive.google.com/open?id=1eivs1x6Drqz6sYjOpL1qRpNicXnC6AS4,,Julia Babahina,Lead Risk Management Specialist ,How to build a career development plan,Created in canvas and power point -15,https://drive.google.com/open?id=1bYFPwxSrdCAKV06vROjRxrMe1xSMnqxyE2-9UygO_pY,,Peace Onyehanere,Software Engineer,Preparing for the AWS Certification Exam,Google search -16,https://drive.google.com/open?id=13-rdjlPEaMP2x8-SKCFP9wwvoAU6UdA3,,Angela Busato,Frontend Software Developer,"I used the question suggested in the meetup ""What inspired me to start coding, and how has it changed my life?”",From Codemotion Conference -17,https://drive.google.com/open?id=1sfB-6ziaDEiStmM95pSYlcGhDsDXTJ8ZCaud7Uhxi5M,,Sahana Venkatesh,WCC newsletter,"This article highlights the journey of the winning team in the Gemma2 AI Challenge, a hackathon hosted by Lablab AI and the Gemma team at Google. It explores the team's ideation process, the technical aspects of their innovative Chrome extension, and the power skills—such as teamwork, perseverance, and effective communication—that played a key role in their success. The article provides insights into the challenges faced, solutions developed, and valuable lessons learned, offering inspiration for aspiring developers and AI enthusiasts.",Ying gave it to me. +My journey with GenAI began with a Hackathon for Social Good, creating Anxi-Aid, a tool utilizing AI for personalized anxiety management plans. This blogpost highlighted the potential of Generative AI in enhancing lives. To harness this technology, I walk through how one needs to understand use case, master prompt engineering, and leverage existing tools like OpenAI’s API.",https://www.peoplematters.in/site/interstitial?return_to=%2Farticle%2Fbusiness%2Fai-adoption-spikes-as-companies-start-seeing-real-value-from-gen-ai-41550,https://drive.google.com/open?id=1jhbFNw5H-xYGaFpNEtAownKti54Q8JI4 +1,https://drive.google.com/open?id=1iMytEs9YfkLhjLmBKX6QZdKN4bcCOMw9,,Adriana Zencke Zimmermann,Tech Lead,Essential Tips for Software Engineer Interview Success,https://unsplash.com/ ,https://drive.google.com/open?id=1AWyfp9fOwpamSeSpe425CdDVCFs7e11T +2,https://drive.google.com/open?id=1kcw5AFt72RY3y09yWHRhIGM6lzwtSCzZNAyA20pFjVk,,Eleonora Belova,Test Automation Engineer,"Going through a job search and preparing for interviews can often feel like navigating a labyrinth of challenges and uncertainties. To shed light on this crucial phase, we reached out to various community members to gather their insights, experiences, and strategies for success. In this article you will find their personal stories, highlighting how they handle rejection, tackle tough technical interviews, and employ unique methods to stand out to potential employers. Additionally, we explore the resources and tools they find valuable in their preparation journey.",https://unsplash.com/photos/woman-in-teal-t-shirt-sitting-beside-woman-in-suit-jacket-JaoVGh5aJ3E,https://drive.google.com/open?id=1VLV2GDrxgAzqNMJR2FWlJ7HvFaNo619Y +3,https://drive.google.com/open?id=195q76fSmTYhnEwukbLUFtlx9tIjA4Qs5,,Madhura Chaganty,Engineering Manager at Paythru,Have you ever considered the environmental impact of the software code we write?,Free image from Canva,https://drive.google.com/open?id=1jBhLx_d5ZeA0_V9xkiaWTIYqlCLp_TFC +4,https://drive.google.com/open?id=1LKIOrIUz_ff8gZpfEaUeks7yhk35WsDy,,Arzu Caner,Full Stack Developer at InnoAl Tech solution,I wrote about my Al DevCamp journey,I created with Canva,https://drive.google.com/open?id=1U0bsYXt7KCIlAhSN7U60VdKwP_ZIZ1zl +5,https://drive.google.com/open?id=1B6pisvehhDNhCWhaINdMqqXVjMLp-Mx5,,Busra Ecem Sakar,Senior Data Analyst,How does Natural Language Processing (NLP) interact between people and technology?,"I used ChatGPT image generator to create pictures, and I added a graph from the research that I mentioned in my blog.",https://drive.google.com/open?id=1T9C_TvShViSLkoLu-BGEowoALAjeRHK2 +6,https://drive.google.com/open?id=1SF5gRqBhTzzsXrcSdT8HgVBcUV_-a7d7XR-4cUoFtdI,,Airat Yusuff,Software Engineer / BJSS,"Learn how to use PartyRock by AWS to build your own AI app in minutes, NO coding involved!",I created in Canva,https://drive.google.com/open?id=1Xs2N2kYcpNUhv1CR7AGNI-2cMjk9j2Fi +7,https://drive.google.com/open?id=1hAxMDwpgJv3lsBipWcfG3aiVmIVoQ7mG,,Busra Ecem Sakar,Senior Data Analyst,It is about NLP.,This image were created by AI.,https://drive.google.com/open?id=1o0UEbYzXDQsASTbwM6XJRQN2dmOFKE-Q +8,https://drive.google.com/open?id=1gH4kd2-KeH8fbp2rAefJm0ewZVGlZ79N,,Irina Kamalova,VP Lead Software Engineer,Microservices Paradise,I created them,https://drive.google.com/open?id=18KxFLBFWcrG3peRtiEBOOywQnwF27pMI +9,https://drive.google.com/open?id=1MKNGzDgMBRurakr1bBeldhl78Vy0dXSY,,Irina Kamalova,VP Lead Software Engineer @ JPMorgan Chase,Tailoring your career quarterly,I created them,https://drive.google.com/open?id=1GCQqhqauzTpnHfgGfNIvoryIar8ByN6G +10,https://drive.google.com/open?id=19Y3brnnkXSBv1ba-KeAcHmNx9h2J86MjwoCTg1ip_6c,,Sahana Venkatesh,"Software Engineer, Wayve",Sharing learnings from one of most interesting tech conferences in the world.,I asked my sister and got her consent before sharing.,https://drive.google.com/open?id=1VzTP1zjHy3X4dHbRyU1_mH37d-ol9f9V +11,https://drive.google.com/open?id=1bUclrl46Dvd4gvZEVzCsmddcKA-YvG9K,,Madhura Chaganty,Engineering Manager @Elliptic,"Kubernetes (or K8s) has no shortage of resources and guides, yet diving into its complexities can still feel overwhelming, especially for beginners. As I embark on my own journey of learning Kubernetes, I wanted to tackle one of its core processes—how a Pod, the fundamental unit of deployment in K8s, progresses from a simple YAML definition to a fully Running state in the cluster. This blog is my first attempt at understanding and simplifying this crucial process.",I created the infographics on my own.,https://drive.google.com/open?id=1M8azDXtSDb2JIU5rStLptsB_9GHvHwLE +12,https://drive.google.com/open?id=16-BUIHjT0wtwhxfQeTF8x45Axq02cEkF-WM4GFf240M,,Julia Babahina,Lead Risk Management Specialist at Swift ,Risk management in engineering and beyond ,"Leal, R. (2024) What is ISO3100?, Advisera [Online] https://advisera.com/articles/what-is-iso-31000/ (Accessed June 2024) ",https://drive.google.com/open?id=10Rd-631uVQDEX-3wSG4u_lkIA3XiCEXF3Sf7JKhfzm8 +13,https://drive.google.com/open?id=1iNIR60YIrNkLUq5yzKgFye2xUW3DvDxlaRdHMHhij2I,,Sahana Venkatesh,Software Engineer,"TechCrunch Disrupt 2024 brought together global tech leaders to explore cutting-edge innovations. I interviewed Madhuvanthi Venkatesh, a data scientist and AI meetup organizer, about her key takeaways: the rise of “tiny LLMs,” the importance of real networking, and Silicon Valley’s leadership in AI.",Madhu shared her photo,https://drive.google.com/open?id=1u8CVojKsJWEnwFgS_oo9JyDMuhe4Aocp +14,https://drive.google.com/open?id=1eivs1x6Drqz6sYjOpL1qRpNicXnC6AS4,,Julia Babahina,Lead Risk Management Specialist ,How to build a career development plan,Created in canvas and power point ,https://drive.google.com/open?id=1MK53v0jBvnzHW9ftWDBB67_dkOHEiDyV +15,https://drive.google.com/open?id=1bYFPwxSrdCAKV06vROjRxrMe1xSMnqxyE2-9UygO_pY,,Peace Onyehanere,Software Engineer,Preparing for the AWS Certification Exam,Google search,https://drive.google.com/open?id=1m_J8yjLFZRRVPVcNlNNuld-7FifMvSYM +16,https://drive.google.com/open?id=13-rdjlPEaMP2x8-SKCFP9wwvoAU6UdA3,,Angela Busato,Frontend Software Developer,"I used the question suggested in the meetup ""What inspired me to start coding, and how has it changed my life?”",From Codemotion Conference,https://drive.google.com/open?id=1b2U_X904TWN13HXivBs3HlHgGhrNor2r +17,https://drive.google.com/open?id=1sfB-6ziaDEiStmM95pSYlcGhDsDXTJ8ZCaud7Uhxi5M,,Sahana Venkatesh,WCC newsletter,"This article highlights the journey of the winning team in the Gemma2 AI Challenge, a hackathon hosted by Lablab AI and the Gemma team at Google. It explores the team's ideation process, the technical aspects of their innovative Chrome extension, and the power skills—such as teamwork, perseverance, and effective communication—that played a key role in their success. The article provides insights into the challenges faced, solutions developed, and valuable lessons learned, offering inspiration for aspiring developers and AI enthusiasts.",Ying gave it to me.,https://drive.google.com/open?id=1yXKkfTqX60xZG9Zj8HbeVE0pTCb6e-LG 18,https://drive.google.com/open?id=12EU2xARTWzdYMwXCA-vSR58Mh5uwbuuJ,,Irina Kamalova,VP Lead Software Engineer @ JPMorgan Chase,"Mid to Senior Engineer: Two strategies -",Excalidraw -19,https://drive.google.com/open?id=1Zh2AYmfoYwTt491rqABm9gX-HnE84UbU,,Nino Godoradze,Data Scientist as Bank of Georgia,"Atomic Habits: Small Changes, Real Results",https://www.pexels.com/search/ -20,https://drive.google.com/open?id=13a94d83AEDYD54weCc7RJp0Arfr8WysC,,Büşra Ç. T. (This is how I'd like my name to look like on the post),-,"A personal reflection on learning to code as a self-taught developer, battling imposter syndrome, and how mentorship helped me stop looking only at what I lacked and start seeing how far I’d come.",Chat GPT -21,https://drive.google.com/open?id=1SaT3sHo0P1dG1lFus8HrLnd0W5TKBJ-_JkSvUEF1oxY,,Rajani Rao,"Principal Technologist, AVEVA","When They Said It Was Over, We Said It Was Just Beginning: Our First Year as WomenCodingCommunity",From my phone -22,https://drive.google.com/open?id=146OJMkVYqteZIT_dzsprMqgyAPE91DBl,,Silke Nodwell,Analyst at Altus Group & Lead at Women Coding Community,"After running a four-week Atomic Habits challenge with the Women Coding Community, I put a few strategies to the test — from early wake-ups to study sessions in coffee shops. In this post, I reflect on what worked, what surprised me, and how one small change to my morning routine helped me finally move past procrastination.","Generated with Nebius AI studio, model black-forest-labs/flux-dev" -23,https://docs.google.com/document/d/1V6balLTykSgUqVfIjOVL6uXxB80lIc4hCiRqqSaWGQE/edit?tab=t.0#heading=h.w2lwu8m6kvda,1V6balLTykSgUqVfIjOVL6uXxB80lIc4hCiRqqSaWGQE,Eleonora Belova,Lead at Women Coding Community,"What started as a side project with just a handful of Google Forms has since grown into a vibrant mentorship community, which helped people grow, land new roles, and build meaningful connections along the way.", -24,https://nishiajmera.com/never-eat-alone-book-summary-80c571603306,,Nishi Ajmera,Manager Technology ,Summary of the book never eat alone, -25,https://docs.google.com/document/d/1PMMpTQZZ9r4dUAY9WZAapEuJ6_rfSY9Fv3FSp90gNPw/edit?usp=sharing,1PMMpTQZZ9r4dUAY9WZAapEuJ6_rfSY9Fv3FSp90gNPw,Udeme Jalekun,Senior QA Engineer / Raenest ,"I can write on software and specialised testing like microservice testing, security testing, and accessibility. I also write on roadmap and career development planning, mentorship, and leadership.",Gemini -26,https://docs.google.com/document/d/1ttn2qP6OuA9wPmCx-zkHqtklykMDWNYTFqGyNZzisgM/edit?usp=sharing,1ttn2qP6OuA9wPmCx-zkHqtklykMDWNYTFqGyNZzisgM,Silke Nodwell,Lead at Women Coding Community,"A behind-the-scenes look at how four women from Women Coding Community, spanning three countries and time zones, built a donor-focused platform for the GNEC Hackathon and placed 3rd out of more than 150 teams. From forming the team on Slack to designing an impact tracker, foodbank map and natural-language charity recommender, this post reflects on collaboration, problem solving and what it takes to succeed in a global hackathon.", +",Excalidraw,https://drive.google.com/open?id=1BNXzcINjXCqhBXtJgZmN-DnfSB1hQN9- +19,https://drive.google.com/open?id=1Zh2AYmfoYwTt491rqABm9gX-HnE84UbU,,Nino Godoradze,Data Scientist as Bank of Georgia,"Atomic Habits: Small Changes, Real Results",https://www.pexels.com/search/,https://drive.google.com/open?id=1-9GQvSObuaVtRXA_rCIbr9F5XcneM-k9 +20,https://drive.google.com/open?id=13a94d83AEDYD54weCc7RJp0Arfr8WysC,,Büşra Ç. T. (This is how I'd like my name to look like on the post),-,"A personal reflection on learning to code as a self-taught developer, battling imposter syndrome, and how mentorship helped me stop looking only at what I lacked and start seeing how far I’d come.",Chat GPT,https://drive.google.com/open?id=1V8-BoTDdHb685ttRVZKkXOUUjfhp1ZDz +21,https://drive.google.com/open?id=1SaT3sHo0P1dG1lFus8HrLnd0W5TKBJ-_JkSvUEF1oxY,,Rajani Rao,"Principal Technologist, AVEVA","When They Said It Was Over, We Said It Was Just Beginning: Our First Year as WomenCodingCommunity",From my phone,https://drive.google.com/open?id=1GOjXhgfHclTdkqHhWX-LHuBCvWStcNFm +22,https://drive.google.com/open?id=146OJMkVYqteZIT_dzsprMqgyAPE91DBl,,Silke Nodwell,Analyst at Altus Group & Lead at Women Coding Community,"After running a four-week Atomic Habits challenge with the Women Coding Community, I put a few strategies to the test — from early wake-ups to study sessions in coffee shops. In this post, I reflect on what worked, what surprised me, and how one small change to my morning routine helped me finally move past procrastination.","Generated with Nebius AI studio, model black-forest-labs/flux-dev",https://drive.google.com/open?id=1X9bGhSMxvmFVkRFUMWI_7fZzNYYTiaB- +23,https://docs.google.com/document/d/1V6balLTykSgUqVfIjOVL6uXxB80lIc4hCiRqqSaWGQE/edit?tab=t.0#heading=h.w2lwu8m6kvda,1V6balLTykSgUqVfIjOVL6uXxB80lIc4hCiRqqSaWGQE,Eleonora Belova,Lead at Women Coding Community,"What started as a side project with just a handful of Google Forms has since grown into a vibrant mentorship community, which helped people grow, land new roles, and build meaningful connections along the way.",, +24,https://nishiajmera.com/never-eat-alone-book-summary-80c571603306,,Nishi Ajmera,Manager Technology ,Summary of the book never eat alone,, +25,https://docs.google.com/document/d/1PMMpTQZZ9r4dUAY9WZAapEuJ6_rfSY9Fv3FSp90gNPw/edit?usp=sharing,1PMMpTQZZ9r4dUAY9WZAapEuJ6_rfSY9Fv3FSp90gNPw,Udeme Jalekun,Senior QA Engineer / Raenest ,"I can write on software and specialised testing like microservice testing, security testing, and accessibility. I also write on roadmap and career development planning, mentorship, and leadership.",Gemini,https://drive.google.com/open?id=1DF08PAjvFPBv8ZGigjwiaFn1JP8TUHg7 +26,https://docs.google.com/document/d/1ttn2qP6OuA9wPmCx-zkHqtklykMDWNYTFqGyNZzisgM/edit?usp=sharing,1ttn2qP6OuA9wPmCx-zkHqtklykMDWNYTFqGyNZzisgM,Silke Nodwell,Lead at Women Coding Community,"A behind-the-scenes look at how four women from Women Coding Community, spanning three countries and time zones, built a donor-focused platform for the GNEC Hackathon and placed 3rd out of more than 150 teams. From forming the team on Slack to designing an impact tracker, foodbank map and natural-language charity recommender, this post reflects on collaboration, problem solving and what it takes to succeed in a global hackathon.",,https://drive.google.com/open?id=1o4byZahHg6KpqvKlJU_IJ0RKZ-nmcMnw diff --git a/tools/blog_automation/check_for_new_blogs.py b/tools/blog_automation/check_for_new_blogs.py new file mode 100644 index 00000000..9173467a --- /dev/null +++ b/tools/blog_automation/check_for_new_blogs.py @@ -0,0 +1,73 @@ +import os +import pandas as pd +from tools.blog_automation.blog_info_from_spreadsheet import dataframe_of_blog_spreadsheet_info + +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) + +def load_previous_blog_info(): + """Load the previously saved blog info snapshot.""" + try: + df = pd.read_csv(os.path.join(SCRIPT_DIR, 'blog_info_snapshot.csv'), index_col=0) + return df + except FileNotFoundError: + print("ERROR: blog_info_snapshot.csv not found.") + print("Please run: python blog_information_from_spreadsheet.py save_blog_info_to_csv()") + return None + +def check_and_update_blogs(): + """ + Check for new blogs by comparing row counts with the snapshot. + Outputs GitHub Actions variables for has_new_rows and new_row_indices. + + Returns: + tuple: (has_new_rows: bool, new_row_indices: list) + """ + df_previous = load_previous_blog_info() + if df_previous is None: + return False, [] + + df_current = dataframe_of_blog_spreadsheet_info() + + count_previous = df_previous.shape[0] + count_current = df_current.shape[0] + + new_row_indices = [] + + if count_current > count_previous: + # New rows added + new_blog_count = count_current - count_previous + print(f"Found {new_blog_count} new blog(s)") + + # Calculate the indices of new rows (0-indexed in the CSV, but the new rows start from count_previous) + new_row_indices = list(range(count_previous, count_current)) + print(f"New row indices: {new_row_indices}") + has_new_rows = True + elif count_current < count_previous: + print("WARNING: Current count is less than previous count. This is unexpected.") + has_new_rows = False + else: + print("No new blogs found.") + has_new_rows = False + + # Check if any existing rows have changed + if not (df_previous.eq(df_current[:count_previous])).all().all(): + for i, (idx, row) in enumerate(df_previous.iterrows()): + if i < len(df_current) and not row.equals(df_current.iloc[i]): + print(f"INFO: Row {i} has changed (not re-processing)") + + # Update snapshot for next run + df_current.to_csv(os.path.join(SCRIPT_DIR, 'blog_info_snapshot.csv')) + + return has_new_rows, new_row_indices + + +if __name__ == '__main__': + has_new_rows, new_row_indices = check_and_update_blogs() + + # Output for GitHub Actions + # Set has_new_rows output + print(f"::set-output name=has_new_rows::{str(has_new_rows).lower()}") + + # Set new_row_indices output (space-separated) + indices_str = ' '.join(map(str, new_row_indices)) + print(f"::set-output name=new_row_indices::{indices_str}") diff --git a/tools/blog_automation/requirements.txt b/tools/blog_automation/requirements.txt index fcc2a6e4..a99b0628 100644 --- a/tools/blog_automation/requirements.txt +++ b/tools/blog_automation/requirements.txt @@ -103,6 +103,7 @@ httpx==0.28.1 huggingface_hub @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_29dc5yixdo/croot/huggingface_hub_1755770856782/work idna==3.10 importlib-metadata @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_5498c88e7n/croot/importlib_metadata-suite_1704813534254/work +iniconfig==2.3.0 ipykernel @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_f428_5tjvx/croot/ipykernel_1705933835534/work ipython @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_f02dzd_ff3/croot/ipython_1718287987043/work jaraco.classes @ file:///tmp/build/80754af9/jaraco.classes_1620983179379/work @@ -170,7 +171,7 @@ pillow==11.3.0 pkce @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_da285fiplp/croot/pkce_1690384839054/work pkginfo @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_21aly_cba3/croot/pkginfo_1715695988648/work platformdirs @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_a8u4fy8k9o/croot/platformdirs_1692205661656/work -pluggy @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_croot-w6jyveby/pluggy_1648109277227/work +pluggy==1.6.0 ply==3.11 preshed==3.0.10 prometheus-client @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_25sgeyk0j5/croots/recipe/prometheus_client_1659455103277/work @@ -200,6 +201,7 @@ PyQt5-sip @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_ PyQtWebEngine==5.15.6 pyshp @ file:///home/conda/feedstock_root/build_artifacts/pyshp_1764355200360/work PySocks @ file:///Users/ktietz/ci_310/pysocks_1643961536721/work +pytest==9.0.2 python-dateutil @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_66ud1l42_h/croot/python-dateutil_1716495741162/work python-dotenv @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_3ahu1_p9lb/croot/python-dotenv_1669132571851/work python-json-logger @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_c3baq2ko4j/croot/python-json-logger_1683823815343/work diff --git a/tools/blog_automation/save_blog_as_html.py b/tools/blog_automation/save_blog_as_html.py deleted file mode 100644 index f391dfb8..00000000 --- a/tools/blog_automation/save_blog_as_html.py +++ /dev/null @@ -1,192 +0,0 @@ -import shutil -from google.oauth2 import service_account -from googleapiclient.discovery import build -import os -import markdown -import argparse -from pathlib import Path -from googleapiclient.errors import HttpError -import datetime as dt -import pandas as pd -import re - -# --- Configuration --- -SERVICE_ACCOUNT_FILE = 'service_account_key.json' -SPREADSHEET_ID = '1Pje2qOn23OgtAyhjqKwQFYcaEAE3gAy5f3T_5LCgA2o' -YAML_HEADER = '''--- -layout: post -title: [TITLE] -date: [DATE] -author_name: [AUTHOR] -author_role: [AUTHOR ROLE] -image: [IMG PATH] -image_source: [IMG SOURCE (optional)] -description: [BLOG DESCRIPTION] -category: [CATEGORY] ---- -''' - -# TODO: Use information from spreadsheet with optional doc_ID param - -def _current_directory(): - return Path(__file__).resolve().parent - -def drive_connection(): - service_account_path = os.path.join(_current_directory(), SERVICE_ACCOUNT_FILE) - if not os.path.exists(service_account_path): - print(f"ERROR: Service account key file '{service_account_path}' not found.\n" - "Please obtain your own Google service account key and place it at this path.\n" - "(Never commit this file to version control.)") - exit(1) - creds = service_account.Credentials.from_service_account_file( - service_account_path, - scopes=['https://www.googleapis.com/auth/drive.readonly'] - ) - drive = build('drive', 'v3', credentials=creds) - return drive - -DRIVE = drive_connection() - -def get_blog_info_from_spreadsheet(spreadsheet_id=SPREADSHEET_ID): - import gspread - import pandas as pd - - # 1) Authenticate using the service account JSON - gc = gspread.service_account(filename="service_account_key.json") - - # 2) Open the spreadsheet by its ID - spreadsheet_id = SPREADSHEET_ID - sh = gc.open_by_key(spreadsheet_id) - - # 3) Select a worksheet/tab (by gid or title) - worksheet = sh.worksheet("Form Responses 1") - - # 4) Get data - data = worksheet.get_all_records() - - # 5) Convert to a pandas DataFrame - df = pd.DataFrame(data) - return df - -def _posts_directory(): - # Path to the directory where the script itself is located - script_dir = _current_directory() - - # Construct the path relative to the script’s location - posts_dir = (script_dir / "../../_posts").resolve() - - return posts_dir - -def _today_date_str(): - return dt.date.today().isoformat() - -def _create_blog_filename_with_date(doc_name, date_str): - formatted_blog_title = doc_name.lower().replace(' ', '-').strip() - filename = f"{date_str}-{formatted_blog_title}" - return filename - -def _update_yaml_header_with_spreadsheet_info(yaml_header): - spreadsheet_info = get_blog_info_from_spreadsheet(drive=DRIVE).iloc[-1].to_dict() - try: - author_name = spreadsheet_info['What is your full name? '] - author_role = spreadsheet_info[ - 'What is your position / company you are working at / associated with? ' - ] - description = spreadsheet_info['Please provide a short description of your writing idea / blog post? '] - source = spreadsheet_info[ - 'Please provide a source of how you obtained/created the infographic/photo/picture used.' - ] - yaml_header = yaml_header.replace('[AUTHOR]', author_name) - yaml_header = yaml_header.replace('[AUTHOR ROLE]', author_role) - yaml_header = yaml_header.replace('[DESCRIPTION]', description) - yaml_header = yaml_header.replace('[SOURCE]', source) - return yaml_header - except KeyError as error: - print(f'Unable to find relevant spreadsheet field. Please check the spreadsheet carefully.\n{error}') - -def export_blog_as_html(document_id, date=None, drive=DRIVE): - if date is None: - date = _today_date_str() - - try: - # 1. Get document name from Drive - doc_metadata = drive.files().get(fileId=document_id, fields='name').execute() - doc_name = doc_metadata.get('name', 'exported_blog') - blog_filename = _create_blog_filename_with_date(doc_name, date) - - # 2. Export as Markdown - request = drive.files().export_media( - fileId=document_id, - mimeType='text/markdown' - ) - md_bytes = request.execute() - except HttpError as error: - if error.resp.status == 404: - raise FileNotFoundError(f"Document ID '{document_id}' not found.") from error - else: - raise - - # 3. Convert Markdown to HTML and save to local file - import re - html = markdown.markdown(md_bytes.decode('utf-8')) - # Remove tags from inside heading tags (e.g.

Heading

->

Heading

) - html = re.sub(r'(.+?)', r'\2', html) - - # Remove the first heading if present (e.g.

...

or

...

at the start) - html = re.sub(r'^.*?\s*', '', html, flags=re.DOTALL) - - # Wrap the body in
- html_body = f'
\n{html}\n
' - - # YAML front matter - yaml_header = YAML_HEADER.replace('[TITLE]', doc_name.title()).replace('[DATE]', date) - yaml_header = _update_yaml_header_with_spreadsheet_info(yaml_header) - - final_html = yaml_header + '\n' + html_body - - posts_dir = _posts_directory() - filename = f"{posts_dir}/{blog_filename}.html" - with open(filename, 'w', encoding='utf-8') as f: - f.write(final_html) - - print(f"Saved HTML to: {filename}") - return blog_filename - -def download_blog_image(spreadsheet_info): - blog_image_drive_link = spreadsheet_info['Submit your blog cover image'] - file_id = re.search(r'drive\.google\.com/file/d/([^/]+)/', blog_image_drive_link).group(1) - # Download the image file - try: - request = DRIVE.files().get_media(fileId=file_id) - image_data = request.execute() - # Save the image locally - image_filename = f"blog_image_{file_id}.jpg" - with open(image_filename, 'wb') as img_file: - img_file.write(image_data) - return image_filename - except HttpError as error: - print(f"Error downloading image: {error}") - return None - -def copy_image_to_blog_assets(image_filename, blog_filename): - assets_dir = Path(__file__).resolve().parent.parent.parent / 'assets' / 'images' / 'blog' - assets_dir.mkdir(parents=True, exist_ok=True) - date_prefix = blog_filename.split('-')[0] - new_image_filename = f"{date_prefix}-{image_filename}" - new_image_path = assets_dir / new_image_filename - shutil.copy(image_filename, new_image_path) - return f"/assets/images/blog/{new_image_filename}" - -def export_blog_with_image(document_id): - spreadsheet_info = get_blog_info_from_spreadsheet(drive=DRIVE) - blog_filename = export_blog_as_html(document_id, spreadsheet_info) - image_filename = download_blog_image(spreadsheet_info) - copy_image_to_blog_assets(image_filename, blog_filename) - -if __name__ == "__main__": - # To run script: `python export_blog.py --date ` - parser = argparse.ArgumentParser(description="Export a Google Doc as HTML with custom formatting.") - parser.add_argument("doc_id", help="The Google Doc ID to export.") - parser.add_argument("--date", help="Date for the blog post (YYYY-MM-DD). If not provided, uses today.", default=None) - args = parser.parse_args() - export_blog_as_html(args.doc_id, args.date) diff --git a/tools/blog_automation/test_blog_exporter.py b/tools/blog_automation/test_blog_exporter.py new file mode 100644 index 00000000..51594371 --- /dev/null +++ b/tools/blog_automation/test_blog_exporter.py @@ -0,0 +1,26 @@ +import pytest +from blog_exporter import drive_connection, download_image_and_copy_to_repo +import os +from pathlib import Path + +@pytest.mark.parametrize( + "example_image", + ["https://drive.google.com/open?id=1o4byZahHg6KpqvKlJU_IJ0RKZ-nmcMnw", + "https://drive.google.com/file/d/1o4byZahHg6KpqvKlJU_IJ0RKZ-nmcMnw/view"] +) +def test_download_image_and_copy_to_repo(example_image): + blog_filename = "test_blog_image" + blog_assets_dir = Path(__file__).resolve().parent.parent.parent / 'assets' / 'images' / 'blog' + # if ../assets/images/blog/{blog_filename}.png exists, then remove it + if os.path.exists(blog_assets_dir / f'{blog_filename}.png'): + os.remove(blog_assets_dir / f'{blog_filename}.png') + + drive = drive_connection() + + image_path_relative = download_image_and_copy_to_repo( + image_link=example_image, blog_filename=blog_filename, drive=drive) + + assert image_path_relative == f"/assets/images/blog/{blog_filename}.png" + + # assert that blog_filename exists + os.remove(blog_assets_dir / f'{blog_filename}.png') \ No newline at end of file From 6aea9cbf1133f9167cc33c2d64c0c4ee7d1700fa Mon Sep 17 00:00:00 2001 From: Silke Nodwell Date: Thu, 29 Jan 2026 07:33:28 +0000 Subject: [PATCH 5/5] Add pytest for the image download --- tools/blog_automation/test_blog_exporter.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/blog_automation/test_blog_exporter.py b/tools/blog_automation/test_blog_exporter.py index 51594371..39df544a 100644 --- a/tools/blog_automation/test_blog_exporter.py +++ b/tools/blog_automation/test_blog_exporter.py @@ -5,8 +5,11 @@ @pytest.mark.parametrize( "example_image", - ["https://drive.google.com/open?id=1o4byZahHg6KpqvKlJU_IJ0RKZ-nmcMnw", - "https://drive.google.com/file/d/1o4byZahHg6KpqvKlJU_IJ0RKZ-nmcMnw/view"] + [ + "https://drive.google.com/open?id=1o4byZahHg6KpqvKlJU_IJ0RKZ-nmcMnw", + "https://drive.google.com/file/d/1o4byZahHg6KpqvKlJU_IJ0RKZ-nmcMnw/view", + "https://drive.google.com/open?id=1DF08PAjvFPBv8ZGigjwiaFn1JP8TUHg7" + ] ) def test_download_image_and_copy_to_repo(example_image): blog_filename = "test_blog_image"