diff --git a/dev/general_scan_v5.ipynb b/dev/general_scan_v5.ipynb index c9c9dc5..81b7323 100644 --- a/dev/general_scan_v5.ipynb +++ b/dev/general_scan_v5.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -22,7 +22,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -51,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -66,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -77,7 +77,7 @@ " 'yaml identifier': 'Electricity'}}" ] }, - "execution_count": 9, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -93,7 +93,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -118,7 +118,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -154,7 +154,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -175,10 +175,16 @@ " 'method_3': {'object': Brightway2 Method: selected LCI results: resource: land occupation,\n", " 'method name': ('selected LCI results', 'resource', 'land occupation'),\n", " 'short name': 'land occupation',\n", - " 'unit': 'square meter-year'}}" + " 'unit': 'square meter-year'},\n", + " 'method_4': {'object': Brightway2 Method: EN15804: inventory indicators ISO21930: use of net fresh water,\n", + " 'method name': ('EN15804',\n", + " 'inventory indicators ISO21930',\n", + " 'use of net fresh water'),\n", + " 'short name': 'use of net fresh water',\n", + " 'unit': 'cubic meter'}}" ] }, - "execution_count": 12, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -189,14 +195,14 @@ "finder.find_and_create_method(criteria=['IPCC', '2013', 'GWP100'], exclude=['no LT'])\n", "finder.find_and_create_method(criteria=['EN15804','Cumulative', 'non-renewable' ])\n", "finder.find_and_create_method(criteria=['land occupation','selected'])\n", - "# finder.find_and_create_method(criteria=['EN15804','fresh water'])\n", + "finder.find_and_create_method(criteria=['EN15804','fresh water'])\n", "method_dict=finder.get_all_methods()\n", "method_dict" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -236,7 +242,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -246,6 +252,8 @@ "Omitting activity name common prefix: 'cement production, '\n", "Omitting activity name common prefix: 'cement production, '\n", "Omitting activity name common prefix: 'cement production, '\n", + "Omitting activity name common prefix: 'cement production, '\n", + "Omitting activity name common prefix: 'electricity production, at biomass-fired IGCC power '\n", "Omitting activity name common prefix: 'electricity production, at biomass-fired IGCC power '\n", "Omitting activity name common prefix: 'electricity production, at biomass-fired IGCC power '\n", "Omitting activity name common prefix: 'electricity production, at biomass-fired IGCC power '\n" @@ -261,20 +269,369 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['Cement_global_warming_potential_(gwp100)', 'Cement_cumulative_energy_demand_-_non-renewable_energy_resources', 'Cement_land_occupation', 'Cement_use_of_net_fresh_water'])" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "scores_dict['Cement']['lca_scores'].keys()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
activityproductlocationunitmethodmethod unittotal6511: Road transport services of freight15310: Natural sands8614: Support and operation services to forestry and logging88311: Wood manufacturing services37510: Non-refractory mortars and concretes12020: Natural gas, liquefied or in the gaseous state15320: Pebbles, gravel, broken or crushed stone, macadam; granules, chippings and powder of stoneother
8plantUKRkilowatt houruse of net fresh watercubic meter0.0001210.0000240.0000180.0000170.0000070.0000040.000000-0.0000250.000016
7plantTURkilowatt houruse of net fresh watercubic meter0.0001210.0000240.0000180.0000170.0000070.0000040.000003-0.0000250.000013
5plantRUSkilowatt houruse of net fresh watercubic meter0.0001200.0000240.0000180.0000170.0000070.0000040.000000-0.0000250.000015
2plantCHNkilowatt houruse of net fresh watercubic meter0.0001200.0000240.0000180.0000170.0000070.0000040.000000-0.0000250.000014
6plantSAFkilowatt houruse of net fresh watercubic meter0.0001190.0000240.0000170.0000170.0000070.0000040.000000-0.0000240.000016
4plantRSAMkilowatt houruse of net fresh watercubic meter0.0001170.0000240.0000170.0000170.0000070.0000040.000000-0.0000240.000014
3plantKORkilowatt houruse of net fresh watercubic meter0.0001160.0000230.0000170.0000170.0000070.0000030.000000-0.0000240.000015
1plantCEUkilowatt houruse of net fresh watercubic meter0.0001160.0000230.0000170.0000170.0000070.0000030.000003-0.0000240.000012
9plantUSAkilowatt houruse of net fresh watercubic meter0.0001150.0000230.0000170.0000170.0000070.0000030.000000-0.0000240.000014
0plantBRAkilowatt houruse of net fresh watercubic meter0.0001040.0000250.0000020.0000170.0000070.0000040.000000-0.0000240.000015
\n", + "
" + ], + "text/plain": [ + " activity product location unit method \\\n", + "8 plant UKR kilowatt hour use of net fresh water \n", + "7 plant TUR kilowatt hour use of net fresh water \n", + "5 plant RUS kilowatt hour use of net fresh water \n", + "2 plant CHN kilowatt hour use of net fresh water \n", + "6 plant SAF kilowatt hour use of net fresh water \n", + "4 plant RSAM kilowatt hour use of net fresh water \n", + "3 plant KOR kilowatt hour use of net fresh water \n", + "1 plant CEU kilowatt hour use of net fresh water \n", + "9 plant USA kilowatt hour use of net fresh water \n", + "0 plant BRA kilowatt hour use of net fresh water \n", + "\n", + " method unit total 6511: Road transport services of freight \\\n", + "8 cubic meter 0.000121 0.000024 \n", + "7 cubic meter 0.000121 0.000024 \n", + "5 cubic meter 0.000120 0.000024 \n", + "2 cubic meter 0.000120 0.000024 \n", + "6 cubic meter 0.000119 0.000024 \n", + "4 cubic meter 0.000117 0.000024 \n", + "3 cubic meter 0.000116 0.000023 \n", + "1 cubic meter 0.000116 0.000023 \n", + "9 cubic meter 0.000115 0.000023 \n", + "0 cubic meter 0.000104 0.000025 \n", + "\n", + " 15310: Natural sands \\\n", + "8 0.000018 \n", + "7 0.000018 \n", + "5 0.000018 \n", + "2 0.000018 \n", + "6 0.000017 \n", + "4 0.000017 \n", + "3 0.000017 \n", + "1 0.000017 \n", + "9 0.000017 \n", + "0 0.000002 \n", + "\n", + " 8614: Support and operation services to forestry and logging \\\n", + "8 0.000017 \n", + "7 0.000017 \n", + "5 0.000017 \n", + "2 0.000017 \n", + "6 0.000017 \n", + "4 0.000017 \n", + "3 0.000017 \n", + "1 0.000017 \n", + "9 0.000017 \n", + "0 0.000017 \n", + "\n", + " 88311: Wood manufacturing services \\\n", + "8 0.000007 \n", + "7 0.000007 \n", + "5 0.000007 \n", + "2 0.000007 \n", + "6 0.000007 \n", + "4 0.000007 \n", + "3 0.000007 \n", + "1 0.000007 \n", + "9 0.000007 \n", + "0 0.000007 \n", + "\n", + " 37510: Non-refractory mortars and concretes \\\n", + "8 0.000004 \n", + "7 0.000004 \n", + "5 0.000004 \n", + "2 0.000004 \n", + "6 0.000004 \n", + "4 0.000004 \n", + "3 0.000003 \n", + "1 0.000003 \n", + "9 0.000003 \n", + "0 0.000004 \n", + "\n", + " 12020: Natural gas, liquefied or in the gaseous state \\\n", + "8 0.000000 \n", + "7 0.000003 \n", + "5 0.000000 \n", + "2 0.000000 \n", + "6 0.000000 \n", + "4 0.000000 \n", + "3 0.000000 \n", + "1 0.000003 \n", + "9 0.000000 \n", + "0 0.000000 \n", + "\n", + " 15320: Pebbles, gravel, broken or crushed stone, macadam; granules, chippings and powder of stone \\\n", + "8 -0.000025 \n", + "7 -0.000025 \n", + "5 -0.000025 \n", + "2 -0.000025 \n", + "6 -0.000024 \n", + "4 -0.000024 \n", + "3 -0.000024 \n", + "1 -0.000024 \n", + "9 -0.000024 \n", + "0 -0.000024 \n", + "\n", + " other \n", + "8 0.000016 \n", + "7 0.000013 \n", + "5 0.000015 \n", + "2 0.000014 \n", + "6 0.000016 \n", + "4 0.000014 \n", + "3 0.000015 \n", + "1 0.000012 \n", + "9 0.000014 \n", + "0 0.000015 " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "scores_dict['Electricity']['lca_scores'].keys()" + "scores_dict['Electricity']['lca_scores']['Electricity_use_of_net_fresh_water']" ] }, { @@ -288,11 +645,11 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ - "def add_statistics(df, column_name='total'):\n", + "def add_statistics(df, column_name='total'): #call fctn data mainpulation?\n", "\n", " #Need a rank row to plot the total LCA scores in descending order (satter opepyxl function takes in non categorial values)\n", " df['rank'] = df[column_name].rank(method=\"first\", ascending=\"False\")\n", @@ -315,48 +672,250 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ - "def sector_lca_scores_to_excel(scores_dict, excel_file_name):\n", + "import re\n", + "import pandas as pd\n", + "\n", + "def clean_column_labels(df):\n", + " # Function to remove numbers and colon from column names\n", + " def clean_label(label):\n", + " if label is None:\n", + " return 'Unnamed' # or return 'Unnamed' if you prefer a placeholder\n", + " return re.sub(r'^\\d+:\\s*', '', str(label))\n", + "\n", + " # Apply the cleaning function to all column names\n", + " df.columns = [clean_label(col) for col in df.columns]\n", + "\n", + " return df\n", + "\n", + "# Example usage:\n", + "# df = pd.read_csv('your_file.csv') # or however you're loading your DataFrame\n", + "# cleaned_df = clean_column_labels(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 193, + "metadata": {}, + "outputs": [], + "source": [ + "def add_sector_marker(df, sector):\n", + " # Add sector marker column\n", + " df['sector']=str(sector) # potentially remove!\n", + " # Reorder the columns to move 'sector' after 'product'\n", + " columns = list(df.columns)\n", + " product_index = columns.index('product')\n", + " # Insert 'sector' after 'product'\n", + " columns.insert(product_index + 1, columns.pop(columns.index('sector')))\n", + " # Reassign the DataFrame with the new column order\n", + " df = df[columns]\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 197, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "\n", + "def find_first_input_column(df):\n", + " pattern = r'^\\d+:\\s*' # Regular expression pattern to match \"Number: Name\" or \"Number Name\"\n", + "\n", + " for idx, column in enumerate(df.columns):\n", + " if column is not None and re.match(pattern, column):\n", + " return idx\n", + "\n", + " return None" + ] + }, + { + "cell_type": "code", + "execution_count": 198, + "metadata": {}, + "outputs": [], + "source": [ + "def sector_lca_scores_to_excel_and_column_positions(scores_dict, excel_file_name):\n", + " \"\"\" \n", + " What it does:\n", + " - Creates a dataframe for each method and sector from the lca scores dictionary\n", + " - Before storing each df in a worksheet in an excel file it:\n", + " - shortens the column labels of the input (removing cpc code)\n", + " - adds a sector name marker for keeping track in excel (when plotting can use it for labeling)\n", + " - adds statistics for plotting\n", + " - creates a dictionary which holds the indexes to the columns we need to call for plotting, this makes it dynamic. Otherwise need to hardcode index column number for openpxyl.\n", + " What it returns:\n", + " - Returns the index positions dictionary where the key is \"sector_method\"\n", + " - Creates excel file as defined by user\n", + " \"\"\"\n", + "\n", " # Prepare to save each LCA score table to a different worksheet in the same Excel file\n", " excel_file = excel_file_name\n", + " column_positions = {} #stores the indexes of columns for plotting\n", " with pd.ExcelWriter(excel_file, engine='openpyxl') as writer:\n", " for sector in scores_dict.keys():\n", " lca_scores = scores_dict[sector]['lca_scores']\n", " for method, table in lca_scores.items():\n", " # Create a DataFrame for the current LCA score table\n", " df = pd.DataFrame(table)\n", - " # Add the index as a new column at the beginning\n", - " df.insert(0, 'Index', df.index)\n", - " # Add statistics to the DataFrame\n", + "\n", + " # Add sector marker\n", + " df = add_sector_marker(df, sector) #!! ADJUST POSITION\n", + "\n", + " # Add statistics to the DataFrame\n", " df = add_statistics(df)\n", - " # Add sector marker column\n", - " df['sector']=str(sector) # potentially remove!\n", - " # Reorder the columns to move 'sector' after 'product'\n", - " columns = list(df.columns)\n", - " product_index = columns.index('product')\n", - " # Insert 'sector' after 'product'\n", - " columns.insert(product_index + 1, columns.pop(columns.index('sector')))\n", - " # Reassign the DataFrame with the new column order\n", - " df = df[columns]\n", + "\n", + " # Get the index values of columns\n", + " columns_of_interest = [\"total\", \"rank\", \"mean\", \"2std_abv\", \"2std_blw\", \"q1\", \"q3\", \"method\", \"method unit\"]\n", + " positions = {col: df.columns.get_loc(col) for col in columns_of_interest if col in df.columns}\n", + " column_positions[method] = positions\n", + "\n", + " # Find the first input column and add it to the positions dictionary\n", + " first_input_col_index = find_first_input_column(df)\n", + " if first_input_col_index is not None:\n", + " positions[\"first_input\"] = first_input_col_index\n", + " print(first_input_col_index)\n", + " # Store the positions for this method\n", + " column_positions[method] = positions\n", + "\n", + " # remove cpc from input labels\n", + " df = clean_column_labels(df)\n", "\n", " # Generate a worksheet name\n", " worksheet_name = f\"{method}\" #f\"{sector}_{method}\"\n", " if len(worksheet_name) > 31:\n", " worksheet_name = worksheet_name[:31]\n", + " \n", " # Save the DataFrame to the Excel file in a new worksheet\n", - " df.to_excel(writer, sheet_name=worksheet_name, index=False)" + " df.to_excel(writer, sheet_name=worksheet_name, index=False)\n", + " return column_positions" ] }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 199, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "14\n", + "14\n", + "15\n", + "16\n", + "15\n", + "14\n", + "14\n", + "14\n" + ] + } + ], + "source": [ + "index_positions=sector_lca_scores_to_excel_and_column_positions(scores_dict, 'output_v22_3.xlsx') #the variable holds the index positions of the columns!!" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Cement_global_warming_potential_(gwp100)': {'total': 7,\n", + " 'rank': 8,\n", + " 'mean': 9,\n", + " '2std_abv': 10,\n", + " '2std_blw': 11,\n", + " 'q1': 12,\n", + " 'q3': 13,\n", + " 'method': 5,\n", + " 'method unit': 6,\n", + " 'first_input': 10},\n", + " 'Cement_cumulative_energy_demand_-_non-renewable_energy_resources': {'total': 7,\n", + " 'rank': 8,\n", + " 'mean': 9,\n", + " '2std_abv': 10,\n", + " '2std_blw': 11,\n", + " 'q1': 12,\n", + " 'q3': 13,\n", + " 'method': 5,\n", + " 'method unit': 6,\n", + " 'first_input': 10},\n", + " 'Cement_land_occupation': {'total': 7,\n", + " 'rank': 8,\n", + " 'mean': 9,\n", + " '2std_abv': 10,\n", + " '2std_blw': 11,\n", + " 'q1': 12,\n", + " 'q3': 13,\n", + " 'method': 5,\n", + " 'method unit': 6,\n", + " 'first_input': 10},\n", + " 'Cement_use_of_net_fresh_water': {'total': 7,\n", + " 'rank': 8,\n", + " 'mean': 9,\n", + " '2std_abv': 10,\n", + " '2std_blw': 11,\n", + " 'q1': 12,\n", + " 'q3': 13,\n", + " 'method': 5,\n", + " 'method unit': 6,\n", + " 'first_input': 10},\n", + " 'Electricity_global_warming_potential_(gwp100)': {'total': 7,\n", + " 'rank': 8,\n", + " 'mean': 9,\n", + " '2std_abv': 10,\n", + " '2std_blw': 11,\n", + " 'q1': 12,\n", + " 'q3': 13,\n", + " 'method': 5,\n", + " 'method unit': 6,\n", + " 'first_input': 10},\n", + " 'Electricity_cumulative_energy_demand_-_non-renewable_energy_resources': {'total': 7,\n", + " 'rank': 8,\n", + " 'mean': 9,\n", + " '2std_abv': 10,\n", + " '2std_blw': 11,\n", + " 'q1': 12,\n", + " 'q3': 13,\n", + " 'method': 5,\n", + " 'method unit': 6,\n", + " 'first_input': 10},\n", + " 'Electricity_land_occupation': {'total': 7,\n", + " 'rank': 8,\n", + " 'mean': 9,\n", + " '2std_abv': 10,\n", + " '2std_blw': 11,\n", + " 'q1': 12,\n", + " 'q3': 13,\n", + " 'method': 5,\n", + " 'method unit': 6,\n", + " 'first_input': 10},\n", + " 'Electricity_use_of_net_fresh_water': {'total': 7,\n", + " 'rank': 8,\n", + " 'mean': 9,\n", + " '2std_abv': 10,\n", + " '2std_blw': 11,\n", + " 'q1': 12,\n", + " 'q3': 13,\n", + " 'method': 5,\n", + " 'method unit': 6,\n", + " 'first_input': 10}}" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "lca_scores_in_excel=sector_lca_scores_to_excel(scores_dict, 'output_v54_2.xlsx')" + "index_positions" ] }, { @@ -381,7 +940,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 200, "metadata": {}, "outputs": [], "source": [ @@ -410,33 +969,51 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 135, "metadata": {}, "outputs": [], "source": [ - "worksheet_dict=categorize_sheets_by_sector('output_v54_2.xlsx')" + "worksheet_dict=categorize_sheets_by_sector('output_v8_3.xlsx')" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'total': 7, 'rank': 8, 'mean': 9, 'q1': 12, 'q3': 13}" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "positions = index_positions.get(\"Cement_global_warming_potential_(gwp100)\", {})\n", + "positions" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Function to plot categorical scatter plots (Note: Axis can't take on categorical labels)" + "Dot Plot" ] }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 220, "metadata": {}, "outputs": [], "source": [ - "from openpyxl.chart import ScatterChart, Reference, Series\n", "from openpyxl import load_workbook\n", - "from openpyxl.drawing.text import CharacterProperties\n", - "from openpyxl.chart.label import DataLabelList\n", - "\n", + "from openpyxl.chart import ScatterChart, Reference, Series\n", "\n", - "def dot_plots(filepath_workbook, worksheet_dict):\n", + "def dot_plots(filepath_workbook, worksheet_dict, index_positions):\n", " \n", " # Load the workbook\n", " wb = load_workbook(filepath_workbook)\n", @@ -445,54 +1022,74 @@ " for sector, worksheet_names in worksheet_dict.items():\n", " \n", " # Create or get the chart sheet for the current sector\n", - " chart_sheet_name = f\"{sector}_dot_charts\"\n", + " chart_sheet_name = f\"{sector}_charts\"\n", " if chart_sheet_name in wb.sheetnames:\n", " ws_charts = wb[chart_sheet_name]\n", " else:\n", - " ws_charts = wb.create_sheet(chart_sheet_name)\n", + " ws_charts = wb.create_sheet(chart_sheet_name) \n", " \n", " # Initial position for the first chart\n", " current_row = 1 # Start placing charts from row 1\n", - " chart_height = 21 # Number of rows a chart occupies, adjust as needed\n", + " current_col = 1 # Start placing charts from column 1\n", + " chart_height = 30 # Number of rows a chart occupies\n", + " chart_width = 12 # Number of columns a chart occupies\n", + " charts_per_row = 3 # Number of charts per row\n", " \n", " # Iterate over each worksheet name in the current sector\n", - " for worksheet_name in worksheet_names:\n", + " for i, worksheet_name in enumerate(worksheet_names):\n", " ws = wb[worksheet_name]\n", "\n", " # Find min_row, max_row and max_column\n", " max_row = ws.max_row\n", - " print(max_row)\n", " max_column = ws.max_column\n", - " min_row=1\n", - "\n", - " #define columns\n", - " total_col=9\n", - " rank_col=10\n", - " index_col=1\n", - " mean_col=11\n", - " std_adv_col=12\n", - " std_blw_col=13\n", - " q1_col=14\n", - " q3_col=15\n", + " min_row = 1\n", + "\n", + " # Find the key in index_positions that contains worksheet_name\n", + " matching_key = None\n", + " for key in index_positions.keys():\n", + " if worksheet_name in key:\n", + " matching_key = key\n", + " break\n", + "\n", + " if not matching_key:\n", + " print(f\"Warning: No matching key found for worksheet '{worksheet_name}'. Skipping...\")\n", + " continue\n", + "\n", + " # Retrieve the column positions from the index_positions dictionary\n", + " positions = index_positions[matching_key]\n", + " total_col = positions.get(\"total\", None) + 1\n", + " rank_col = positions.get(\"rank\", None) + 1\n", + " mean_col = positions.get(\"mean\", None) + 1\n", + " std_adv_col = positions.get(\"2std_abv\", None) + 1\n", + " std_blw_col = positions.get(\"2std_blw\", None) + 1\n", + " q1_col = positions.get(\"q1\", None) + 1\n", + " q3_col = positions.get(\"q3\", None) + 1\n", + " method_col = positions.get(\"method\", None) + 1\n", + " method_unit_col = positions.get(\"method unit\", None) + 1\n", + " \n", + " # Ensure that all required columns are present\n", + " if None in [total_col, rank_col, mean_col, std_adv_col, std_blw_col, q1_col, q3_col, method_col, method_unit_col]:\n", + " print(f\"Warning: Missing columns in worksheet '{worksheet_name}' for sector '{sector}'. Skipping...\")\n", + " continue\n", " \n", " # Create a ScatterChart (or other chart type as needed)\n", " chart = ScatterChart()\n", "\n", " # Chart titles\n", - " chart.title = f\"{ws['G2'].value} LCA scores for {sector} sector\"\n", - " chart.y_axis.title = f\"{ws['H2'].value}\"\n", + " method_value = ws.cell(row=2, column=method_col).value\n", + " chart.title = f\"{method_value} LCA scores for {sector} sector\" \n", + " \n", + " method_unit_value = ws.cell(row=2, column=method_unit_col).value\n", + " chart.y_axis.title = f\"{method_unit_value}\"\n", " chart.x_axis.title = 'activity rank'\n", " # Avoid overlap\n", " chart.title.overlay = False\n", " chart.x_axis.title.overlay = False\n", " chart.y_axis.title.overlay = False \n", "\n", - "\n", " # Define the data range for the chart\n", " y_values = Reference(ws, min_col=total_col, min_row=min_row, max_row=max_row)\n", - " print(y_values)\n", " x_values = Reference(ws, min_col=rank_col, min_row=min_row, max_row=max_row)\n", - " print(x_values)\n", "\n", " # Create a series and add it to the chart\n", " series = Series(y_values, x_values, title_from_data=True)\n", @@ -504,12 +1101,11 @@ " series.marker.size = 5\n", " series.graphicalProperties.line.noFill = True\n", "\n", - "\n", " # ADJUST X-AXIS\n", " chart.x_axis.tickLblPos = \"low\"\n", - " chart.x_axis.majorGridlines = None\n", - " chart.x_axis.tickMarkSkip = 0 # Show all tick marks\n", - " chart.x_axis.tickLblSkip = 0 # Show all labels\n", + " chart.x_axis.majorGridlines = None \n", + " chart.x_axis.tickMarkSkip = 1 # Show all tick marks, this adresses the tick lines \n", + " chart.x_axis.tickLblSkip = 1 # Show all labels, doesnt work\n", "\n", " chart.x_axis.scaling.orientation = \"minMax\"\n", " chart.x_axis.crosses = \"autoZero\"\n", @@ -517,49 +1113,27 @@ " chart.x_axis.delete = False\n", "\n", " # ADJUST Y-AXIS\n", - "\n", - " # Calculate the min and max y-values to determine the axis range\n", - " y_values_list = [ws.cell(row=row, column=total_col).value for row in range(2,max_row+1)] #number of rows need to be defined based on dataframe\n", - " min_y_value = min(y_values_list)-min(y_values_list)*0.1\n", - " max_y_value = max(y_values_list)+ max(y_values_list)*0.1\n", - "\n", - " # Set y-axis range with some padding\n", - " padding = (max_y_value - min_y_value) * 0.1 # 10% padding on each side\n", - " chart.y_axis.scaling.min = min_y_value - padding\n", - " chart.y_axis.scaling.max = max_y_value + padding\n", - "\n", - " # Explicitly set tick labels to be visible\n", " chart.y_axis.tickLblPos = \"nextTo\" # Position the labels next to the tick marks\n", - " #chart.y_axis.majorGridlines = True # Ensure major gridlines are enabled (though you mentioned they're visible)\n", " chart.y_axis.delete = False # Ensure axis is not deleted\n", - " # Format the y-axis to show 2 decimal places\n", - " chart.y_axis.number_format = '0.000'\n", + " chart.y_axis.number_format = '0.00000'\n", + " chart.y_axis.majorGridlines = None \n", "\n", - "\n", - " # ---------\n", " # ADD STATS\n", - " # ---------\n", " # MEAN\n", - " # Add a new series to the chart for the mean line\n", " mean_y = Reference(ws, min_col=mean_col, min_row=min_row, max_row=max_row)\n", " mean_series = Series(mean_y, x_values, title_from_data=\"True\")\n", " chart.series.append(mean_series)\n", - "\n", - " # Customize the mean series to show as a line\n", " mean_series.marker.symbol = \"none\" # No markers, just a line\n", " mean_series.graphicalProperties.line.solidFill = \"FF0000\" # Red line for mean value\n", " mean_series.graphicalProperties.line.width = 10000 # Set line width (default units are EMUs)\n", "\n", " # IQR\n", - " # Add a new series to the chart for the iqr lines\n", - " iqr1= Reference(ws, min_col=q1_col, min_row=min_row, max_row=max_row)\n", - " iqr3= Reference(ws, min_col=q3_col, min_row=min_row, max_row=max_row)\n", + " iqr1 = Reference(ws, min_col=q1_col, min_row=min_row, max_row=max_row)\n", + " iqr3 = Reference(ws, min_col=q3_col, min_row=min_row, max_row=max_row)\n", " iqr1_series = Series(iqr1, x_values, title_from_data=\"True\")\n", " iqr3_series = Series(iqr3, x_values, title_from_data=\"True\")\n", " chart.series.append(iqr1_series)\n", " chart.series.append(iqr3_series)\n", - "\n", - " # Customize the iqr 1,3 series to show as a line\n", " iqr1_series.marker.symbol = \"none\" # No markers, just a line\n", " iqr3_series.marker.symbol = \"none\"\n", " iqr1_series.graphicalProperties.line.solidFill = \"6082B6\" # Blue line \n", @@ -568,15 +1142,12 @@ " iqr3_series.graphicalProperties.line.width = 10000 # Set line width (default units are EMUs)\n", "\n", " # STD\n", - " # Add a new series to the chart for the std lines\n", - " std_abv= Reference(ws, min_col=std_adv_col, min_row=min_row, max_row=max_row)\n", - " std_blw= Reference(ws, min_col=std_blw_col, min_row=min_row, max_row=max_row)\n", + " std_abv = Reference(ws, min_col=std_adv_col, min_row=min_row, max_row=max_row)\n", + " std_blw = Reference(ws, min_col=std_blw_col, min_row=min_row, max_row=max_row)\n", " std_abv_series = Series(std_abv, x_values, title_from_data=\"True\")\n", " std_blw_series = Series(std_blw, x_values, title_from_data=\"True\")\n", " chart.series.append(std_abv_series)\n", " chart.series.append(std_blw_series)\n", - "\n", - " # Customize the iqr 1,3 series to show as a line\n", " std_abv_series.marker.symbol = \"none\" # No markers, just a line\n", " std_blw_series.marker.symbol = \"none\"\n", " std_abv_series.graphicalProperties.line.solidFill = \"FFC300\" # yellow line\n", @@ -586,55 +1157,28 @@ "\n", " # Set legend position to the right of the plot area\n", " chart.legend.position = 'r' # 'r' for right\n", - " chart.legend.overlay= False\n", + " chart.legend.overlay = False\n", "\n", - " # Adjust chart dimensions and position to make space for the legend\n", - " chart.width = 18 # Example width, adjust as needed\n", - " chart.height = 10 # Example height, adjust as needed\n", + " # Adjust chart dimensions\n", + " chart.width = 20 # Width of the chart\n", + " chart.height = 14 # Height of the chart\n", "\n", - " # Add the chart to the chart worksheet\n", " # Calculate the position for this chart\n", - " position = f\"A{current_row}\"\n", + " position = ws_charts.cell(row=current_row, column=current_col).coordinate\n", " ws_charts.add_chart(chart, position)\n", " \n", - " # Update current_row to position the next chart below the current one\n", - " current_row += chart_height\n", + " # Update position for the next chart\n", + " current_col += chart_width +1 \n", + " if (i + 1) % charts_per_row == 0: # Move to the next row after placing `charts_per_row` charts\n", + " current_row += chart_height +1\n", + " current_col = 1 # Reset to the first column\n", "\n", - " wb.save(filepath_workbook)" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "8\n", - "'Cement_global_warming_potential'!$I$1:$I$8\n", - "'Cement_global_warming_potential'!$J$1:$J$8\n", - "8\n", - "'Cement_cumulative_energy_demand'!$I$1:$I$8\n", - "'Cement_cumulative_energy_demand'!$J$1:$J$8\n", - "8\n", - "'Cement_land_occupation'!$I$1:$I$8\n", - "'Cement_land_occupation'!$J$1:$J$8\n", - "11\n", - "'Electricity_global_warming_pote'!$I$1:$I$11\n", - "'Electricity_global_warming_pote'!$J$1:$J$11\n", - "11\n", - "'Electricity_cumulative_energy_d'!$I$1:$I$11\n", - "'Electricity_cumulative_energy_d'!$J$1:$J$11\n", - "11\n", - "'Electricity_land_occupation'!$I$1:$I$11\n", - "'Electricity_land_occupation'!$J$1:$J$11\n" - ] - } - ], - "source": [ - "dot_plots('output_v54_2.xlsx', worksheet_dict=worksheet_dict)" + " # Move the chart sheet to the first position\n", + " wb._sheets.remove(ws_charts)\n", + " wb._sheets.insert(0, ws_charts)\n", + "\n", + " wb.save(filepath_workbook)\n", + " return current_row" ] }, { @@ -646,14 +1190,14 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 216, "metadata": {}, "outputs": [], "source": [ "from openpyxl import load_workbook\n", "from openpyxl.chart import BarChart, Reference\n", "\n", - "def stacked_bars(filepath_workbook, worksheet_dict):\n", + "def stacked_bars(filepath_workbook, worksheet_dict, current_row_dot_plot):\n", " # Load the workbook\n", " wb = load_workbook(filepath_workbook)\n", " \n", @@ -661,37 +1205,58 @@ " for sector, worksheet_names in worksheet_dict.items():\n", " \n", " # Create or get the chart sheet for the current sector\n", - " chart_sheet_name = f\"{sector}_stacked_bars\"\n", + " chart_sheet_name = f\"{sector}_charts\"\n", " if chart_sheet_name in wb.sheetnames:\n", " ws_charts = wb[chart_sheet_name]\n", " else:\n", " ws_charts = wb.create_sheet(chart_sheet_name)\n", " \n", " # Initial position for the first chart\n", - " current_row = 1 # Start placing charts from row 1\n", + " chart_height = 30 # Number of rows a chart occupies\n", + " chart_width = 12 # Number of columns a chart occupies\n", + " current_row = current_row_dot_plot + chart_height # Start placing charts from row where dot plots have left of\n", " current_col = 1 # Start placing charts from column 1\n", - " chart_height = 18 # Number of rows a chart occupies\n", - " chart_width = 15 # Number of columns a chart occupies\n", " charts_per_row = 3 # Number of charts per row\n", " \n", " # Iterate over each worksheet name in the current sector\n", " for i, worksheet_name in enumerate(worksheet_names):\n", " ws = wb[worksheet_name]\n", "\n", + " # Find the key in index_positions that contains worksheet_name\n", + " matching_key = None\n", + " for key in index_positions.keys():\n", + " if worksheet_name in key:\n", + " matching_key = key\n", + " break\n", + "\n", + " if not matching_key:\n", + " print(f\"Warning: No matching key found for worksheet '{worksheet_name}'. Skipping...\")\n", + " continue\n", + "\n", + " # Retrieve the column positions from the index_positions dictionary\n", + " positions = index_positions[matching_key]\n", + "\n", " # Find min_row, max_row and max_column\n", " max_row = ws.max_row\n", " max_column = ws.max_column\n", - " input_min_col = 16\n", + " input_min_col = positions.get(\"first_input\", None) + 1\n", + " rank_col = positions.get(\"rank\", None) + 1\n", + " method_col = positions.get(\"method\", None) + 1\n", + " method_unit_col = positions.get(\"method unit\", None) + 1\n", "\n", " chart = BarChart()\n", - " chart.type = \"col\"\n", + " chart.type = \"bar\"\n", " chart.style = 2\n", " chart.grouping = \"stacked\"\n", " chart.overlap = 100\n", "\n", " # Chart titles\n", - " chart.title = f\"{ws['G2'].value} input contributions to LCA scores for {sector} sector\"\n", - " chart.y_axis.title = f\"{ws['H2'].value}\"\n", + " method_value = ws.cell(row=2, column=method_col).value\n", + " chart.title = f\"{sector} sector inputs contributions to {method_value}\"\n", + "\n", + " method_unit_value = ws.cell(row=2, column=method_unit_col).value\n", + " chart.y_axis.title = f\"{method_unit_value}\"\n", + " \n", " chart.x_axis.title = 'activity index'\n", "\n", " # Avoid overlap\n", @@ -702,7 +1267,7 @@ "\n", " # Define data\n", " data = Reference(ws, min_col=input_min_col, min_row=1, max_row=max_row, max_col=max_column)\n", - " cats = Reference(ws, min_col=1, min_row=2, max_row=max_row)\n", + " cats = Reference(ws, min_col=rank_col, min_row=2, max_row=max_row)\n", "\n", " chart.add_data(data, titles_from_data=True)\n", " chart.set_categories(cats)\n", @@ -717,13 +1282,9 @@ " chart.y_axis.delete = False # Ensure axis is not deleted\n", " chart.y_axis.number_format = '0.000'\n", "\n", - " # Set legend position to the right of the plot area\n", - " chart.legend.position = 'r' # 'r' for right\n", - " chart.legend.overlay = False\n", - "\n", " # Adjust chart dimensions\n", - " chart.width = 15 # Example width, adjust as needed\n", - " chart.height = 25 # Example height, adjust as needed\n", + " chart.width = 20 # Width of the chart\n", + " chart.height = 14 # Height of the chart\n", "\n", " # Add the chart to the chart worksheet\n", " # Calculate the position for this chart\n", @@ -731,21 +1292,43 @@ " ws_charts.add_chart(chart, position)\n", " \n", " # Update position for the next chart\n", - " current_col += chart_width -4\n", + " current_col += chart_width +1\n", " if (i + 1) % charts_per_row == 0: # Move to the next row after placing `charts_per_row` charts\n", - " current_row += chart_height +2\n", + " current_row += chart_height +1\n", " current_col = 1 # Reset to the first column\n", + "\n", + " # Move the chart sheet to the first position\n", + " wb._sheets.remove(ws_charts)\n", + " wb._sheets.insert(0, ws_charts)\n", " \n", " wb.save(filepath_workbook)\n" ] }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 221, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "14\n", + "14\n", + "15\n", + "16\n", + "15\n", + "14\n", + "14\n", + "14\n" + ] + } + ], "source": [ - "stacked_bars('output_v54_2.xlsx', worksheet_dict=worksheet_dict)" + "index_positions=sector_lca_scores_to_excel_and_column_positions(scores_dict, 'output_v26_3.xlsx') #the variable holds the index positions of the columns!!\n", + "worksheet_dict=categorize_sheets_by_sector('output_v26_3.xlsx')\n", + "current_row_dots = dot_plots('output_v26_3.xlsx', worksheet_dict=worksheet_dict, index_positions=index_positions)\n", + "stacked_bars('output_v26_3.xlsx', worksheet_dict=worksheet_dict, current_row_dot_plot=current_row_dots)\n" ] }, {