diff --git a/dev/general_scan_v5.ipynb b/dev/general_scan_v5.ipynb
index c9c9dc5..81b7323 100644
--- a/dev/general_scan_v5.ipynb
+++ b/dev/general_scan_v5.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@@ -22,7 +22,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 2,
"metadata": {},
"outputs": [
{
@@ -51,7 +51,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@@ -66,7 +66,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 4,
"metadata": {},
"outputs": [
{
@@ -77,7 +77,7 @@
" 'yaml identifier': 'Electricity'}}"
]
},
- "execution_count": 9,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@@ -93,7 +93,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
@@ -118,7 +118,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
@@ -154,7 +154,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
@@ -175,10 +175,16 @@
" 'method_3': {'object': Brightway2 Method: selected LCI results: resource: land occupation,\n",
" 'method name': ('selected LCI results', 'resource', 'land occupation'),\n",
" 'short name': 'land occupation',\n",
- " 'unit': 'square meter-year'}}"
+ " 'unit': 'square meter-year'},\n",
+ " 'method_4': {'object': Brightway2 Method: EN15804: inventory indicators ISO21930: use of net fresh water,\n",
+ " 'method name': ('EN15804',\n",
+ " 'inventory indicators ISO21930',\n",
+ " 'use of net fresh water'),\n",
+ " 'short name': 'use of net fresh water',\n",
+ " 'unit': 'cubic meter'}}"
]
},
- "execution_count": 12,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -189,14 +195,14 @@
"finder.find_and_create_method(criteria=['IPCC', '2013', 'GWP100'], exclude=['no LT'])\n",
"finder.find_and_create_method(criteria=['EN15804','Cumulative', 'non-renewable' ])\n",
"finder.find_and_create_method(criteria=['land occupation','selected'])\n",
- "# finder.find_and_create_method(criteria=['EN15804','fresh water'])\n",
+ "finder.find_and_create_method(criteria=['EN15804','fresh water'])\n",
"method_dict=finder.get_all_methods()\n",
"method_dict"
]
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
@@ -236,7 +242,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 9,
"metadata": {},
"outputs": [
{
@@ -246,6 +252,8 @@
"Omitting activity name common prefix: 'cement production, '\n",
"Omitting activity name common prefix: 'cement production, '\n",
"Omitting activity name common prefix: 'cement production, '\n",
+ "Omitting activity name common prefix: 'cement production, '\n",
+ "Omitting activity name common prefix: 'electricity production, at biomass-fired IGCC power '\n",
"Omitting activity name common prefix: 'electricity production, at biomass-fired IGCC power '\n",
"Omitting activity name common prefix: 'electricity production, at biomass-fired IGCC power '\n",
"Omitting activity name common prefix: 'electricity production, at biomass-fired IGCC power '\n"
@@ -261,20 +269,369 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 31,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "dict_keys(['Cement_global_warming_potential_(gwp100)', 'Cement_cumulative_energy_demand_-_non-renewable_energy_resources', 'Cement_land_occupation', 'Cement_use_of_net_fresh_water'])"
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"scores_dict['Cement']['lca_scores'].keys()"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " activity | \n",
+ " product | \n",
+ " location | \n",
+ " unit | \n",
+ " method | \n",
+ " method unit | \n",
+ " total | \n",
+ " 6511: Road transport services of freight | \n",
+ " 15310: Natural sands | \n",
+ " 8614: Support and operation services to forestry and logging | \n",
+ " 88311: Wood manufacturing services | \n",
+ " 37510: Non-refractory mortars and concretes | \n",
+ " 12020: Natural gas, liquefied or in the gaseous state | \n",
+ " 15320: Pebbles, gravel, broken or crushed stone, macadam; granules, chippings and powder of stone | \n",
+ " other | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 8 | \n",
+ " plant | \n",
+ " | \n",
+ " UKR | \n",
+ " kilowatt hour | \n",
+ " use of net fresh water | \n",
+ " cubic meter | \n",
+ " 0.000121 | \n",
+ " 0.000024 | \n",
+ " 0.000018 | \n",
+ " 0.000017 | \n",
+ " 0.000007 | \n",
+ " 0.000004 | \n",
+ " 0.000000 | \n",
+ " -0.000025 | \n",
+ " 0.000016 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " plant | \n",
+ " | \n",
+ " TUR | \n",
+ " kilowatt hour | \n",
+ " use of net fresh water | \n",
+ " cubic meter | \n",
+ " 0.000121 | \n",
+ " 0.000024 | \n",
+ " 0.000018 | \n",
+ " 0.000017 | \n",
+ " 0.000007 | \n",
+ " 0.000004 | \n",
+ " 0.000003 | \n",
+ " -0.000025 | \n",
+ " 0.000013 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " plant | \n",
+ " | \n",
+ " RUS | \n",
+ " kilowatt hour | \n",
+ " use of net fresh water | \n",
+ " cubic meter | \n",
+ " 0.000120 | \n",
+ " 0.000024 | \n",
+ " 0.000018 | \n",
+ " 0.000017 | \n",
+ " 0.000007 | \n",
+ " 0.000004 | \n",
+ " 0.000000 | \n",
+ " -0.000025 | \n",
+ " 0.000015 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " plant | \n",
+ " | \n",
+ " CHN | \n",
+ " kilowatt hour | \n",
+ " use of net fresh water | \n",
+ " cubic meter | \n",
+ " 0.000120 | \n",
+ " 0.000024 | \n",
+ " 0.000018 | \n",
+ " 0.000017 | \n",
+ " 0.000007 | \n",
+ " 0.000004 | \n",
+ " 0.000000 | \n",
+ " -0.000025 | \n",
+ " 0.000014 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " plant | \n",
+ " | \n",
+ " SAF | \n",
+ " kilowatt hour | \n",
+ " use of net fresh water | \n",
+ " cubic meter | \n",
+ " 0.000119 | \n",
+ " 0.000024 | \n",
+ " 0.000017 | \n",
+ " 0.000017 | \n",
+ " 0.000007 | \n",
+ " 0.000004 | \n",
+ " 0.000000 | \n",
+ " -0.000024 | \n",
+ " 0.000016 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " plant | \n",
+ " | \n",
+ " RSAM | \n",
+ " kilowatt hour | \n",
+ " use of net fresh water | \n",
+ " cubic meter | \n",
+ " 0.000117 | \n",
+ " 0.000024 | \n",
+ " 0.000017 | \n",
+ " 0.000017 | \n",
+ " 0.000007 | \n",
+ " 0.000004 | \n",
+ " 0.000000 | \n",
+ " -0.000024 | \n",
+ " 0.000014 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " plant | \n",
+ " | \n",
+ " KOR | \n",
+ " kilowatt hour | \n",
+ " use of net fresh water | \n",
+ " cubic meter | \n",
+ " 0.000116 | \n",
+ " 0.000023 | \n",
+ " 0.000017 | \n",
+ " 0.000017 | \n",
+ " 0.000007 | \n",
+ " 0.000003 | \n",
+ " 0.000000 | \n",
+ " -0.000024 | \n",
+ " 0.000015 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " plant | \n",
+ " | \n",
+ " CEU | \n",
+ " kilowatt hour | \n",
+ " use of net fresh water | \n",
+ " cubic meter | \n",
+ " 0.000116 | \n",
+ " 0.000023 | \n",
+ " 0.000017 | \n",
+ " 0.000017 | \n",
+ " 0.000007 | \n",
+ " 0.000003 | \n",
+ " 0.000003 | \n",
+ " -0.000024 | \n",
+ " 0.000012 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " plant | \n",
+ " | \n",
+ " USA | \n",
+ " kilowatt hour | \n",
+ " use of net fresh water | \n",
+ " cubic meter | \n",
+ " 0.000115 | \n",
+ " 0.000023 | \n",
+ " 0.000017 | \n",
+ " 0.000017 | \n",
+ " 0.000007 | \n",
+ " 0.000003 | \n",
+ " 0.000000 | \n",
+ " -0.000024 | \n",
+ " 0.000014 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " plant | \n",
+ " | \n",
+ " BRA | \n",
+ " kilowatt hour | \n",
+ " use of net fresh water | \n",
+ " cubic meter | \n",
+ " 0.000104 | \n",
+ " 0.000025 | \n",
+ " 0.000002 | \n",
+ " 0.000017 | \n",
+ " 0.000007 | \n",
+ " 0.000004 | \n",
+ " 0.000000 | \n",
+ " -0.000024 | \n",
+ " 0.000015 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " activity product location unit method \\\n",
+ "8 plant UKR kilowatt hour use of net fresh water \n",
+ "7 plant TUR kilowatt hour use of net fresh water \n",
+ "5 plant RUS kilowatt hour use of net fresh water \n",
+ "2 plant CHN kilowatt hour use of net fresh water \n",
+ "6 plant SAF kilowatt hour use of net fresh water \n",
+ "4 plant RSAM kilowatt hour use of net fresh water \n",
+ "3 plant KOR kilowatt hour use of net fresh water \n",
+ "1 plant CEU kilowatt hour use of net fresh water \n",
+ "9 plant USA kilowatt hour use of net fresh water \n",
+ "0 plant BRA kilowatt hour use of net fresh water \n",
+ "\n",
+ " method unit total 6511: Road transport services of freight \\\n",
+ "8 cubic meter 0.000121 0.000024 \n",
+ "7 cubic meter 0.000121 0.000024 \n",
+ "5 cubic meter 0.000120 0.000024 \n",
+ "2 cubic meter 0.000120 0.000024 \n",
+ "6 cubic meter 0.000119 0.000024 \n",
+ "4 cubic meter 0.000117 0.000024 \n",
+ "3 cubic meter 0.000116 0.000023 \n",
+ "1 cubic meter 0.000116 0.000023 \n",
+ "9 cubic meter 0.000115 0.000023 \n",
+ "0 cubic meter 0.000104 0.000025 \n",
+ "\n",
+ " 15310: Natural sands \\\n",
+ "8 0.000018 \n",
+ "7 0.000018 \n",
+ "5 0.000018 \n",
+ "2 0.000018 \n",
+ "6 0.000017 \n",
+ "4 0.000017 \n",
+ "3 0.000017 \n",
+ "1 0.000017 \n",
+ "9 0.000017 \n",
+ "0 0.000002 \n",
+ "\n",
+ " 8614: Support and operation services to forestry and logging \\\n",
+ "8 0.000017 \n",
+ "7 0.000017 \n",
+ "5 0.000017 \n",
+ "2 0.000017 \n",
+ "6 0.000017 \n",
+ "4 0.000017 \n",
+ "3 0.000017 \n",
+ "1 0.000017 \n",
+ "9 0.000017 \n",
+ "0 0.000017 \n",
+ "\n",
+ " 88311: Wood manufacturing services \\\n",
+ "8 0.000007 \n",
+ "7 0.000007 \n",
+ "5 0.000007 \n",
+ "2 0.000007 \n",
+ "6 0.000007 \n",
+ "4 0.000007 \n",
+ "3 0.000007 \n",
+ "1 0.000007 \n",
+ "9 0.000007 \n",
+ "0 0.000007 \n",
+ "\n",
+ " 37510: Non-refractory mortars and concretes \\\n",
+ "8 0.000004 \n",
+ "7 0.000004 \n",
+ "5 0.000004 \n",
+ "2 0.000004 \n",
+ "6 0.000004 \n",
+ "4 0.000004 \n",
+ "3 0.000003 \n",
+ "1 0.000003 \n",
+ "9 0.000003 \n",
+ "0 0.000004 \n",
+ "\n",
+ " 12020: Natural gas, liquefied or in the gaseous state \\\n",
+ "8 0.000000 \n",
+ "7 0.000003 \n",
+ "5 0.000000 \n",
+ "2 0.000000 \n",
+ "6 0.000000 \n",
+ "4 0.000000 \n",
+ "3 0.000000 \n",
+ "1 0.000003 \n",
+ "9 0.000000 \n",
+ "0 0.000000 \n",
+ "\n",
+ " 15320: Pebbles, gravel, broken or crushed stone, macadam; granules, chippings and powder of stone \\\n",
+ "8 -0.000025 \n",
+ "7 -0.000025 \n",
+ "5 -0.000025 \n",
+ "2 -0.000025 \n",
+ "6 -0.000024 \n",
+ "4 -0.000024 \n",
+ "3 -0.000024 \n",
+ "1 -0.000024 \n",
+ "9 -0.000024 \n",
+ "0 -0.000024 \n",
+ "\n",
+ " other \n",
+ "8 0.000016 \n",
+ "7 0.000013 \n",
+ "5 0.000015 \n",
+ "2 0.000014 \n",
+ "6 0.000016 \n",
+ "4 0.000014 \n",
+ "3 0.000015 \n",
+ "1 0.000012 \n",
+ "9 0.000014 \n",
+ "0 0.000015 "
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "scores_dict['Electricity']['lca_scores'].keys()"
+ "scores_dict['Electricity']['lca_scores']['Electricity_use_of_net_fresh_water']"
]
},
{
@@ -288,11 +645,11 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
- "def add_statistics(df, column_name='total'):\n",
+ "def add_statistics(df, column_name='total'): #call fctn data mainpulation?\n",
"\n",
" #Need a rank row to plot the total LCA scores in descending order (satter opepyxl function takes in non categorial values)\n",
" df['rank'] = df[column_name].rank(method=\"first\", ascending=\"False\")\n",
@@ -315,48 +672,250 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
- "def sector_lca_scores_to_excel(scores_dict, excel_file_name):\n",
+ "import re\n",
+ "import pandas as pd\n",
+ "\n",
+ "def clean_column_labels(df):\n",
+ " # Function to remove numbers and colon from column names\n",
+ " def clean_label(label):\n",
+ " if label is None:\n",
+ " return 'Unnamed' # or return 'Unnamed' if you prefer a placeholder\n",
+ " return re.sub(r'^\\d+:\\s*', '', str(label))\n",
+ "\n",
+ " # Apply the cleaning function to all column names\n",
+ " df.columns = [clean_label(col) for col in df.columns]\n",
+ "\n",
+ " return df\n",
+ "\n",
+ "# Example usage:\n",
+ "# df = pd.read_csv('your_file.csv') # or however you're loading your DataFrame\n",
+ "# cleaned_df = clean_column_labels(df)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 193,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def add_sector_marker(df, sector):\n",
+ " # Add sector marker column\n",
+ " df['sector']=str(sector) # potentially remove!\n",
+ " # Reorder the columns to move 'sector' after 'product'\n",
+ " columns = list(df.columns)\n",
+ " product_index = columns.index('product')\n",
+ " # Insert 'sector' after 'product'\n",
+ " columns.insert(product_index + 1, columns.pop(columns.index('sector')))\n",
+ " # Reassign the DataFrame with the new column order\n",
+ " df = df[columns]\n",
+ " return df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 197,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import re\n",
+ "\n",
+ "def find_first_input_column(df):\n",
+ " pattern = r'^\\d+:\\s*' # Regular expression pattern to match \"Number: Name\" or \"Number Name\"\n",
+ "\n",
+ " for idx, column in enumerate(df.columns):\n",
+ " if column is not None and re.match(pattern, column):\n",
+ " return idx\n",
+ "\n",
+ " return None"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 198,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def sector_lca_scores_to_excel_and_column_positions(scores_dict, excel_file_name):\n",
+ " \"\"\" \n",
+ " What it does:\n",
+ " - Creates a dataframe for each method and sector from the lca scores dictionary\n",
+ " - Before storing each df in a worksheet in an excel file it:\n",
+ " - shortens the column labels of the input (removing cpc code)\n",
+ " - adds a sector name marker for keeping track in excel (when plotting can use it for labeling)\n",
+ " - adds statistics for plotting\n",
+ " - creates a dictionary which holds the indexes to the columns we need to call for plotting, this makes it dynamic. Otherwise need to hardcode index column number for openpxyl.\n",
+ " What it returns:\n",
+ " - Returns the index positions dictionary where the key is \"sector_method\"\n",
+ " - Creates excel file as defined by user\n",
+ " \"\"\"\n",
+ "\n",
" # Prepare to save each LCA score table to a different worksheet in the same Excel file\n",
" excel_file = excel_file_name\n",
+ " column_positions = {} #stores the indexes of columns for plotting\n",
" with pd.ExcelWriter(excel_file, engine='openpyxl') as writer:\n",
" for sector in scores_dict.keys():\n",
" lca_scores = scores_dict[sector]['lca_scores']\n",
" for method, table in lca_scores.items():\n",
" # Create a DataFrame for the current LCA score table\n",
" df = pd.DataFrame(table)\n",
- " # Add the index as a new column at the beginning\n",
- " df.insert(0, 'Index', df.index)\n",
- " # Add statistics to the DataFrame\n",
+ "\n",
+ " # Add sector marker\n",
+ " df = add_sector_marker(df, sector) #!! ADJUST POSITION\n",
+ "\n",
+ " # Add statistics to the DataFrame\n",
" df = add_statistics(df)\n",
- " # Add sector marker column\n",
- " df['sector']=str(sector) # potentially remove!\n",
- " # Reorder the columns to move 'sector' after 'product'\n",
- " columns = list(df.columns)\n",
- " product_index = columns.index('product')\n",
- " # Insert 'sector' after 'product'\n",
- " columns.insert(product_index + 1, columns.pop(columns.index('sector')))\n",
- " # Reassign the DataFrame with the new column order\n",
- " df = df[columns]\n",
+ "\n",
+ " # Get the index values of columns\n",
+ " columns_of_interest = [\"total\", \"rank\", \"mean\", \"2std_abv\", \"2std_blw\", \"q1\", \"q3\", \"method\", \"method unit\"]\n",
+ " positions = {col: df.columns.get_loc(col) for col in columns_of_interest if col in df.columns}\n",
+ " column_positions[method] = positions\n",
+ "\n",
+ " # Find the first input column and add it to the positions dictionary\n",
+ " first_input_col_index = find_first_input_column(df)\n",
+ " if first_input_col_index is not None:\n",
+ " positions[\"first_input\"] = first_input_col_index\n",
+ " print(first_input_col_index)\n",
+ " # Store the positions for this method\n",
+ " column_positions[method] = positions\n",
+ "\n",
+ " # remove cpc from input labels\n",
+ " df = clean_column_labels(df)\n",
"\n",
" # Generate a worksheet name\n",
" worksheet_name = f\"{method}\" #f\"{sector}_{method}\"\n",
" if len(worksheet_name) > 31:\n",
" worksheet_name = worksheet_name[:31]\n",
+ " \n",
" # Save the DataFrame to the Excel file in a new worksheet\n",
- " df.to_excel(writer, sheet_name=worksheet_name, index=False)"
+ " df.to_excel(writer, sheet_name=worksheet_name, index=False)\n",
+ " return column_positions"
]
},
{
"cell_type": "code",
- "execution_count": 39,
+ "execution_count": 199,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "14\n",
+ "14\n",
+ "15\n",
+ "16\n",
+ "15\n",
+ "14\n",
+ "14\n",
+ "14\n"
+ ]
+ }
+ ],
+ "source": [
+ "index_positions=sector_lca_scores_to_excel_and_column_positions(scores_dict, 'output_v22_3.xlsx') #the variable holds the index positions of the columns!!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 105,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'Cement_global_warming_potential_(gwp100)': {'total': 7,\n",
+ " 'rank': 8,\n",
+ " 'mean': 9,\n",
+ " '2std_abv': 10,\n",
+ " '2std_blw': 11,\n",
+ " 'q1': 12,\n",
+ " 'q3': 13,\n",
+ " 'method': 5,\n",
+ " 'method unit': 6,\n",
+ " 'first_input': 10},\n",
+ " 'Cement_cumulative_energy_demand_-_non-renewable_energy_resources': {'total': 7,\n",
+ " 'rank': 8,\n",
+ " 'mean': 9,\n",
+ " '2std_abv': 10,\n",
+ " '2std_blw': 11,\n",
+ " 'q1': 12,\n",
+ " 'q3': 13,\n",
+ " 'method': 5,\n",
+ " 'method unit': 6,\n",
+ " 'first_input': 10},\n",
+ " 'Cement_land_occupation': {'total': 7,\n",
+ " 'rank': 8,\n",
+ " 'mean': 9,\n",
+ " '2std_abv': 10,\n",
+ " '2std_blw': 11,\n",
+ " 'q1': 12,\n",
+ " 'q3': 13,\n",
+ " 'method': 5,\n",
+ " 'method unit': 6,\n",
+ " 'first_input': 10},\n",
+ " 'Cement_use_of_net_fresh_water': {'total': 7,\n",
+ " 'rank': 8,\n",
+ " 'mean': 9,\n",
+ " '2std_abv': 10,\n",
+ " '2std_blw': 11,\n",
+ " 'q1': 12,\n",
+ " 'q3': 13,\n",
+ " 'method': 5,\n",
+ " 'method unit': 6,\n",
+ " 'first_input': 10},\n",
+ " 'Electricity_global_warming_potential_(gwp100)': {'total': 7,\n",
+ " 'rank': 8,\n",
+ " 'mean': 9,\n",
+ " '2std_abv': 10,\n",
+ " '2std_blw': 11,\n",
+ " 'q1': 12,\n",
+ " 'q3': 13,\n",
+ " 'method': 5,\n",
+ " 'method unit': 6,\n",
+ " 'first_input': 10},\n",
+ " 'Electricity_cumulative_energy_demand_-_non-renewable_energy_resources': {'total': 7,\n",
+ " 'rank': 8,\n",
+ " 'mean': 9,\n",
+ " '2std_abv': 10,\n",
+ " '2std_blw': 11,\n",
+ " 'q1': 12,\n",
+ " 'q3': 13,\n",
+ " 'method': 5,\n",
+ " 'method unit': 6,\n",
+ " 'first_input': 10},\n",
+ " 'Electricity_land_occupation': {'total': 7,\n",
+ " 'rank': 8,\n",
+ " 'mean': 9,\n",
+ " '2std_abv': 10,\n",
+ " '2std_blw': 11,\n",
+ " 'q1': 12,\n",
+ " 'q3': 13,\n",
+ " 'method': 5,\n",
+ " 'method unit': 6,\n",
+ " 'first_input': 10},\n",
+ " 'Electricity_use_of_net_fresh_water': {'total': 7,\n",
+ " 'rank': 8,\n",
+ " 'mean': 9,\n",
+ " '2std_abv': 10,\n",
+ " '2std_blw': 11,\n",
+ " 'q1': 12,\n",
+ " 'q3': 13,\n",
+ " 'method': 5,\n",
+ " 'method unit': 6,\n",
+ " 'first_input': 10}}"
+ ]
+ },
+ "execution_count": 105,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "lca_scores_in_excel=sector_lca_scores_to_excel(scores_dict, 'output_v54_2.xlsx')"
+ "index_positions"
]
},
{
@@ -381,7 +940,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 200,
"metadata": {},
"outputs": [],
"source": [
@@ -410,33 +969,51 @@
},
{
"cell_type": "code",
- "execution_count": 40,
+ "execution_count": 135,
"metadata": {},
"outputs": [],
"source": [
- "worksheet_dict=categorize_sheets_by_sector('output_v54_2.xlsx')"
+ "worksheet_dict=categorize_sheets_by_sector('output_v8_3.xlsx')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'total': 7, 'rank': 8, 'mean': 9, 'q1': 12, 'q3': 13}"
+ ]
+ },
+ "execution_count": 56,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "positions = index_positions.get(\"Cement_global_warming_potential_(gwp100)\", {})\n",
+ "positions"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "Function to plot categorical scatter plots (Note: Axis can't take on categorical labels)"
+ "Dot Plot"
]
},
{
"cell_type": "code",
- "execution_count": 41,
+ "execution_count": 220,
"metadata": {},
"outputs": [],
"source": [
- "from openpyxl.chart import ScatterChart, Reference, Series\n",
"from openpyxl import load_workbook\n",
- "from openpyxl.drawing.text import CharacterProperties\n",
- "from openpyxl.chart.label import DataLabelList\n",
- "\n",
+ "from openpyxl.chart import ScatterChart, Reference, Series\n",
"\n",
- "def dot_plots(filepath_workbook, worksheet_dict):\n",
+ "def dot_plots(filepath_workbook, worksheet_dict, index_positions):\n",
" \n",
" # Load the workbook\n",
" wb = load_workbook(filepath_workbook)\n",
@@ -445,54 +1022,74 @@
" for sector, worksheet_names in worksheet_dict.items():\n",
" \n",
" # Create or get the chart sheet for the current sector\n",
- " chart_sheet_name = f\"{sector}_dot_charts\"\n",
+ " chart_sheet_name = f\"{sector}_charts\"\n",
" if chart_sheet_name in wb.sheetnames:\n",
" ws_charts = wb[chart_sheet_name]\n",
" else:\n",
- " ws_charts = wb.create_sheet(chart_sheet_name)\n",
+ " ws_charts = wb.create_sheet(chart_sheet_name) \n",
" \n",
" # Initial position for the first chart\n",
" current_row = 1 # Start placing charts from row 1\n",
- " chart_height = 21 # Number of rows a chart occupies, adjust as needed\n",
+ " current_col = 1 # Start placing charts from column 1\n",
+ " chart_height = 30 # Number of rows a chart occupies\n",
+ " chart_width = 12 # Number of columns a chart occupies\n",
+ " charts_per_row = 3 # Number of charts per row\n",
" \n",
" # Iterate over each worksheet name in the current sector\n",
- " for worksheet_name in worksheet_names:\n",
+ " for i, worksheet_name in enumerate(worksheet_names):\n",
" ws = wb[worksheet_name]\n",
"\n",
" # Find min_row, max_row and max_column\n",
" max_row = ws.max_row\n",
- " print(max_row)\n",
" max_column = ws.max_column\n",
- " min_row=1\n",
- "\n",
- " #define columns\n",
- " total_col=9\n",
- " rank_col=10\n",
- " index_col=1\n",
- " mean_col=11\n",
- " std_adv_col=12\n",
- " std_blw_col=13\n",
- " q1_col=14\n",
- " q3_col=15\n",
+ " min_row = 1\n",
+ "\n",
+ " # Find the key in index_positions that contains worksheet_name\n",
+ " matching_key = None\n",
+ " for key in index_positions.keys():\n",
+ " if worksheet_name in key:\n",
+ " matching_key = key\n",
+ " break\n",
+ "\n",
+ " if not matching_key:\n",
+ " print(f\"Warning: No matching key found for worksheet '{worksheet_name}'. Skipping...\")\n",
+ " continue\n",
+ "\n",
+ " # Retrieve the column positions from the index_positions dictionary\n",
+ " positions = index_positions[matching_key]\n",
+ " total_col = positions.get(\"total\", None) + 1\n",
+ " rank_col = positions.get(\"rank\", None) + 1\n",
+ " mean_col = positions.get(\"mean\", None) + 1\n",
+ " std_adv_col = positions.get(\"2std_abv\", None) + 1\n",
+ " std_blw_col = positions.get(\"2std_blw\", None) + 1\n",
+ " q1_col = positions.get(\"q1\", None) + 1\n",
+ " q3_col = positions.get(\"q3\", None) + 1\n",
+ " method_col = positions.get(\"method\", None) + 1\n",
+ " method_unit_col = positions.get(\"method unit\", None) + 1\n",
+ " \n",
+ " # Ensure that all required columns are present\n",
+ " if None in [total_col, rank_col, mean_col, std_adv_col, std_blw_col, q1_col, q3_col, method_col, method_unit_col]:\n",
+ " print(f\"Warning: Missing columns in worksheet '{worksheet_name}' for sector '{sector}'. Skipping...\")\n",
+ " continue\n",
" \n",
" # Create a ScatterChart (or other chart type as needed)\n",
" chart = ScatterChart()\n",
"\n",
" # Chart titles\n",
- " chart.title = f\"{ws['G2'].value} LCA scores for {sector} sector\"\n",
- " chart.y_axis.title = f\"{ws['H2'].value}\"\n",
+ " method_value = ws.cell(row=2, column=method_col).value\n",
+ " chart.title = f\"{method_value} LCA scores for {sector} sector\" \n",
+ " \n",
+ " method_unit_value = ws.cell(row=2, column=method_unit_col).value\n",
+ " chart.y_axis.title = f\"{method_unit_value}\"\n",
" chart.x_axis.title = 'activity rank'\n",
" # Avoid overlap\n",
" chart.title.overlay = False\n",
" chart.x_axis.title.overlay = False\n",
" chart.y_axis.title.overlay = False \n",
"\n",
- "\n",
" # Define the data range for the chart\n",
" y_values = Reference(ws, min_col=total_col, min_row=min_row, max_row=max_row)\n",
- " print(y_values)\n",
" x_values = Reference(ws, min_col=rank_col, min_row=min_row, max_row=max_row)\n",
- " print(x_values)\n",
"\n",
" # Create a series and add it to the chart\n",
" series = Series(y_values, x_values, title_from_data=True)\n",
@@ -504,12 +1101,11 @@
" series.marker.size = 5\n",
" series.graphicalProperties.line.noFill = True\n",
"\n",
- "\n",
" # ADJUST X-AXIS\n",
" chart.x_axis.tickLblPos = \"low\"\n",
- " chart.x_axis.majorGridlines = None\n",
- " chart.x_axis.tickMarkSkip = 0 # Show all tick marks\n",
- " chart.x_axis.tickLblSkip = 0 # Show all labels\n",
+ " chart.x_axis.majorGridlines = None \n",
+ " chart.x_axis.tickMarkSkip = 1 # Show all tick marks, this adresses the tick lines \n",
+ " chart.x_axis.tickLblSkip = 1 # Show all labels, doesnt work\n",
"\n",
" chart.x_axis.scaling.orientation = \"minMax\"\n",
" chart.x_axis.crosses = \"autoZero\"\n",
@@ -517,49 +1113,27 @@
" chart.x_axis.delete = False\n",
"\n",
" # ADJUST Y-AXIS\n",
- "\n",
- " # Calculate the min and max y-values to determine the axis range\n",
- " y_values_list = [ws.cell(row=row, column=total_col).value for row in range(2,max_row+1)] #number of rows need to be defined based on dataframe\n",
- " min_y_value = min(y_values_list)-min(y_values_list)*0.1\n",
- " max_y_value = max(y_values_list)+ max(y_values_list)*0.1\n",
- "\n",
- " # Set y-axis range with some padding\n",
- " padding = (max_y_value - min_y_value) * 0.1 # 10% padding on each side\n",
- " chart.y_axis.scaling.min = min_y_value - padding\n",
- " chart.y_axis.scaling.max = max_y_value + padding\n",
- "\n",
- " # Explicitly set tick labels to be visible\n",
" chart.y_axis.tickLblPos = \"nextTo\" # Position the labels next to the tick marks\n",
- " #chart.y_axis.majorGridlines = True # Ensure major gridlines are enabled (though you mentioned they're visible)\n",
" chart.y_axis.delete = False # Ensure axis is not deleted\n",
- " # Format the y-axis to show 2 decimal places\n",
- " chart.y_axis.number_format = '0.000'\n",
+ " chart.y_axis.number_format = '0.00000'\n",
+ " chart.y_axis.majorGridlines = None \n",
"\n",
- "\n",
- " # ---------\n",
" # ADD STATS\n",
- " # ---------\n",
" # MEAN\n",
- " # Add a new series to the chart for the mean line\n",
" mean_y = Reference(ws, min_col=mean_col, min_row=min_row, max_row=max_row)\n",
" mean_series = Series(mean_y, x_values, title_from_data=\"True\")\n",
" chart.series.append(mean_series)\n",
- "\n",
- " # Customize the mean series to show as a line\n",
" mean_series.marker.symbol = \"none\" # No markers, just a line\n",
" mean_series.graphicalProperties.line.solidFill = \"FF0000\" # Red line for mean value\n",
" mean_series.graphicalProperties.line.width = 10000 # Set line width (default units are EMUs)\n",
"\n",
" # IQR\n",
- " # Add a new series to the chart for the iqr lines\n",
- " iqr1= Reference(ws, min_col=q1_col, min_row=min_row, max_row=max_row)\n",
- " iqr3= Reference(ws, min_col=q3_col, min_row=min_row, max_row=max_row)\n",
+ " iqr1 = Reference(ws, min_col=q1_col, min_row=min_row, max_row=max_row)\n",
+ " iqr3 = Reference(ws, min_col=q3_col, min_row=min_row, max_row=max_row)\n",
" iqr1_series = Series(iqr1, x_values, title_from_data=\"True\")\n",
" iqr3_series = Series(iqr3, x_values, title_from_data=\"True\")\n",
" chart.series.append(iqr1_series)\n",
" chart.series.append(iqr3_series)\n",
- "\n",
- " # Customize the iqr 1,3 series to show as a line\n",
" iqr1_series.marker.symbol = \"none\" # No markers, just a line\n",
" iqr3_series.marker.symbol = \"none\"\n",
" iqr1_series.graphicalProperties.line.solidFill = \"6082B6\" # Blue line \n",
@@ -568,15 +1142,12 @@
" iqr3_series.graphicalProperties.line.width = 10000 # Set line width (default units are EMUs)\n",
"\n",
" # STD\n",
- " # Add a new series to the chart for the std lines\n",
- " std_abv= Reference(ws, min_col=std_adv_col, min_row=min_row, max_row=max_row)\n",
- " std_blw= Reference(ws, min_col=std_blw_col, min_row=min_row, max_row=max_row)\n",
+ " std_abv = Reference(ws, min_col=std_adv_col, min_row=min_row, max_row=max_row)\n",
+ " std_blw = Reference(ws, min_col=std_blw_col, min_row=min_row, max_row=max_row)\n",
" std_abv_series = Series(std_abv, x_values, title_from_data=\"True\")\n",
" std_blw_series = Series(std_blw, x_values, title_from_data=\"True\")\n",
" chart.series.append(std_abv_series)\n",
" chart.series.append(std_blw_series)\n",
- "\n",
- " # Customize the iqr 1,3 series to show as a line\n",
" std_abv_series.marker.symbol = \"none\" # No markers, just a line\n",
" std_blw_series.marker.symbol = \"none\"\n",
" std_abv_series.graphicalProperties.line.solidFill = \"FFC300\" # yellow line\n",
@@ -586,55 +1157,28 @@
"\n",
" # Set legend position to the right of the plot area\n",
" chart.legend.position = 'r' # 'r' for right\n",
- " chart.legend.overlay= False\n",
+ " chart.legend.overlay = False\n",
"\n",
- " # Adjust chart dimensions and position to make space for the legend\n",
- " chart.width = 18 # Example width, adjust as needed\n",
- " chart.height = 10 # Example height, adjust as needed\n",
+ " # Adjust chart dimensions\n",
+ " chart.width = 20 # Width of the chart\n",
+ " chart.height = 14 # Height of the chart\n",
"\n",
- " # Add the chart to the chart worksheet\n",
" # Calculate the position for this chart\n",
- " position = f\"A{current_row}\"\n",
+ " position = ws_charts.cell(row=current_row, column=current_col).coordinate\n",
" ws_charts.add_chart(chart, position)\n",
" \n",
- " # Update current_row to position the next chart below the current one\n",
- " current_row += chart_height\n",
+ " # Update position for the next chart\n",
+ " current_col += chart_width +1 \n",
+ " if (i + 1) % charts_per_row == 0: # Move to the next row after placing `charts_per_row` charts\n",
+ " current_row += chart_height +1\n",
+ " current_col = 1 # Reset to the first column\n",
"\n",
- " wb.save(filepath_workbook)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 42,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "8\n",
- "'Cement_global_warming_potential'!$I$1:$I$8\n",
- "'Cement_global_warming_potential'!$J$1:$J$8\n",
- "8\n",
- "'Cement_cumulative_energy_demand'!$I$1:$I$8\n",
- "'Cement_cumulative_energy_demand'!$J$1:$J$8\n",
- "8\n",
- "'Cement_land_occupation'!$I$1:$I$8\n",
- "'Cement_land_occupation'!$J$1:$J$8\n",
- "11\n",
- "'Electricity_global_warming_pote'!$I$1:$I$11\n",
- "'Electricity_global_warming_pote'!$J$1:$J$11\n",
- "11\n",
- "'Electricity_cumulative_energy_d'!$I$1:$I$11\n",
- "'Electricity_cumulative_energy_d'!$J$1:$J$11\n",
- "11\n",
- "'Electricity_land_occupation'!$I$1:$I$11\n",
- "'Electricity_land_occupation'!$J$1:$J$11\n"
- ]
- }
- ],
- "source": [
- "dot_plots('output_v54_2.xlsx', worksheet_dict=worksheet_dict)"
+ " # Move the chart sheet to the first position\n",
+ " wb._sheets.remove(ws_charts)\n",
+ " wb._sheets.insert(0, ws_charts)\n",
+ "\n",
+ " wb.save(filepath_workbook)\n",
+ " return current_row"
]
},
{
@@ -646,14 +1190,14 @@
},
{
"cell_type": "code",
- "execution_count": 43,
+ "execution_count": 216,
"metadata": {},
"outputs": [],
"source": [
"from openpyxl import load_workbook\n",
"from openpyxl.chart import BarChart, Reference\n",
"\n",
- "def stacked_bars(filepath_workbook, worksheet_dict):\n",
+ "def stacked_bars(filepath_workbook, worksheet_dict, current_row_dot_plot):\n",
" # Load the workbook\n",
" wb = load_workbook(filepath_workbook)\n",
" \n",
@@ -661,37 +1205,58 @@
" for sector, worksheet_names in worksheet_dict.items():\n",
" \n",
" # Create or get the chart sheet for the current sector\n",
- " chart_sheet_name = f\"{sector}_stacked_bars\"\n",
+ " chart_sheet_name = f\"{sector}_charts\"\n",
" if chart_sheet_name in wb.sheetnames:\n",
" ws_charts = wb[chart_sheet_name]\n",
" else:\n",
" ws_charts = wb.create_sheet(chart_sheet_name)\n",
" \n",
" # Initial position for the first chart\n",
- " current_row = 1 # Start placing charts from row 1\n",
+ " chart_height = 30 # Number of rows a chart occupies\n",
+ " chart_width = 12 # Number of columns a chart occupies\n",
+ " current_row = current_row_dot_plot + chart_height # Start placing charts from row where dot plots have left of\n",
" current_col = 1 # Start placing charts from column 1\n",
- " chart_height = 18 # Number of rows a chart occupies\n",
- " chart_width = 15 # Number of columns a chart occupies\n",
" charts_per_row = 3 # Number of charts per row\n",
" \n",
" # Iterate over each worksheet name in the current sector\n",
" for i, worksheet_name in enumerate(worksheet_names):\n",
" ws = wb[worksheet_name]\n",
"\n",
+ " # Find the key in index_positions that contains worksheet_name\n",
+ " matching_key = None\n",
+ " for key in index_positions.keys():\n",
+ " if worksheet_name in key:\n",
+ " matching_key = key\n",
+ " break\n",
+ "\n",
+ " if not matching_key:\n",
+ " print(f\"Warning: No matching key found for worksheet '{worksheet_name}'. Skipping...\")\n",
+ " continue\n",
+ "\n",
+ " # Retrieve the column positions from the index_positions dictionary\n",
+ " positions = index_positions[matching_key]\n",
+ "\n",
" # Find min_row, max_row and max_column\n",
" max_row = ws.max_row\n",
" max_column = ws.max_column\n",
- " input_min_col = 16\n",
+ " input_min_col = positions.get(\"first_input\", None) + 1\n",
+ " rank_col = positions.get(\"rank\", None) + 1\n",
+ " method_col = positions.get(\"method\", None) + 1\n",
+ " method_unit_col = positions.get(\"method unit\", None) + 1\n",
"\n",
" chart = BarChart()\n",
- " chart.type = \"col\"\n",
+ " chart.type = \"bar\"\n",
" chart.style = 2\n",
" chart.grouping = \"stacked\"\n",
" chart.overlap = 100\n",
"\n",
" # Chart titles\n",
- " chart.title = f\"{ws['G2'].value} input contributions to LCA scores for {sector} sector\"\n",
- " chart.y_axis.title = f\"{ws['H2'].value}\"\n",
+ " method_value = ws.cell(row=2, column=method_col).value\n",
+ " chart.title = f\"{sector} sector inputs contributions to {method_value}\"\n",
+ "\n",
+ " method_unit_value = ws.cell(row=2, column=method_unit_col).value\n",
+ " chart.y_axis.title = f\"{method_unit_value}\"\n",
+ " \n",
" chart.x_axis.title = 'activity index'\n",
"\n",
" # Avoid overlap\n",
@@ -702,7 +1267,7 @@
"\n",
" # Define data\n",
" data = Reference(ws, min_col=input_min_col, min_row=1, max_row=max_row, max_col=max_column)\n",
- " cats = Reference(ws, min_col=1, min_row=2, max_row=max_row)\n",
+ " cats = Reference(ws, min_col=rank_col, min_row=2, max_row=max_row)\n",
"\n",
" chart.add_data(data, titles_from_data=True)\n",
" chart.set_categories(cats)\n",
@@ -717,13 +1282,9 @@
" chart.y_axis.delete = False # Ensure axis is not deleted\n",
" chart.y_axis.number_format = '0.000'\n",
"\n",
- " # Set legend position to the right of the plot area\n",
- " chart.legend.position = 'r' # 'r' for right\n",
- " chart.legend.overlay = False\n",
- "\n",
" # Adjust chart dimensions\n",
- " chart.width = 15 # Example width, adjust as needed\n",
- " chart.height = 25 # Example height, adjust as needed\n",
+ " chart.width = 20 # Width of the chart\n",
+ " chart.height = 14 # Height of the chart\n",
"\n",
" # Add the chart to the chart worksheet\n",
" # Calculate the position for this chart\n",
@@ -731,21 +1292,43 @@
" ws_charts.add_chart(chart, position)\n",
" \n",
" # Update position for the next chart\n",
- " current_col += chart_width -4\n",
+ " current_col += chart_width +1\n",
" if (i + 1) % charts_per_row == 0: # Move to the next row after placing `charts_per_row` charts\n",
- " current_row += chart_height +2\n",
+ " current_row += chart_height +1\n",
" current_col = 1 # Reset to the first column\n",
+ "\n",
+ " # Move the chart sheet to the first position\n",
+ " wb._sheets.remove(ws_charts)\n",
+ " wb._sheets.insert(0, ws_charts)\n",
" \n",
" wb.save(filepath_workbook)\n"
]
},
{
"cell_type": "code",
- "execution_count": 44,
+ "execution_count": 221,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "14\n",
+ "14\n",
+ "15\n",
+ "16\n",
+ "15\n",
+ "14\n",
+ "14\n",
+ "14\n"
+ ]
+ }
+ ],
"source": [
- "stacked_bars('output_v54_2.xlsx', worksheet_dict=worksheet_dict)"
+ "index_positions=sector_lca_scores_to_excel_and_column_positions(scores_dict, 'output_v26_3.xlsx') #the variable holds the index positions of the columns!!\n",
+ "worksheet_dict=categorize_sheets_by_sector('output_v26_3.xlsx')\n",
+ "current_row_dots = dot_plots('output_v26_3.xlsx', worksheet_dict=worksheet_dict, index_positions=index_positions)\n",
+ "stacked_bars('output_v26_3.xlsx', worksheet_dict=worksheet_dict, current_row_dot_plot=current_row_dots)\n"
]
},
{