From b25949994d9e918c49671a575c4be7815ce23180 Mon Sep 17 00:00:00 2001 From: Samapriya Roy Date: Thu, 28 Sep 2023 21:08:12 -0500 Subject: [PATCH] Update and rename data_parse.yml to deploy_parse.yml Added deploy and parse to same page - Made markdown copy function - Created dependency --- .github/workflows/data_parse.yml | 86 ------------------------------ .github/workflows/deploy_parse.yml | 43 +++++++++++++++ 2 files changed, 43 insertions(+), 86 deletions(-) delete mode 100644 .github/workflows/data_parse.yml create mode 100644 .github/workflows/deploy_parse.yml diff --git a/.github/workflows/data_parse.yml b/.github/workflows/data_parse.yml deleted file mode 100644 index 7bae066..0000000 --- a/.github/workflows/data_parse.yml +++ /dev/null @@ -1,86 +0,0 @@ -name: manuscript_export -on: - push: - branches: - - main - -jobs: - build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: "3.8" - - - name: upgrade pip and install python packages - run: | - python -m pip install --upgrade pip - pip install -U pip setuptools - pip install requests - pip install beautifulsoup4 - pip install pandas - pip install lxml - pip install html5lib - - name: output_render - uses: jannekem/run-python-script-action@v1 - with: - script: | - import json - from datetime import datetime - import pandas as pd - import requests - from bs4 import BeautifulSoup - today = str(datetime.today().strftime("%Y-%m-%d")) - - def readme_parse(): - # use pandas to convert url to list - url = "https://github.com/ladiesoflandsat/LOLManuscriptMonday/blob/main/README.md" - url_info = pd.read_html(url) - # convert to dataframe - df = url_info[0] - response = requests.get(url) - soup = BeautifulSoup(response.text, "html.parser") - table = soup.find("table") - - links = [] - for tr in table.findAll("tr"): - trs = tr.findAll("td") - for each in trs: - try: - link = each.find("a")["href"] - links.append(link) - except Exception as error: - pass - - length_links = len(links) // 3 - result = [links[i::3] for i in range(length_links)] - - columns_url = ["link_article", "link_handle", "link_tweet"] - df_links = pd.DataFrame(columns=columns_url) - for i in range(length_links): - df_links.loc[i] = result[0][i], result[1][i], result[2][i] - df = pd.concat([df, df_links], axis=1) - for link_col in columns_url: - df[link_col] = df[link_col].map(lambda short_link: f"{short_link}") - - df.to_csv(f"LOLMonday_latest.csv", index=False, encoding="utf-8-sig") - df2 = df.to_json(orient="records", indent=2) - print(json.dumps(json.loads(df2), indent=2)) - with open(f"LOLMonday_latest.json", "w") as f: - f.write(json.dumps(json.loads(df2), indent=2)) - readme_parse() - - name: commit files - continue-on-error: true - run: | - today=$(date +"%Y-%m-%d") - git config --local user.email "action@github.com" - git config --local user.name "GitHub Action" - git add -A - git commit -m "updated datasets ${today} UTC" -a - - name: push changes - continue-on-error: true - uses: ad-m/github-push-action@v0.6.0 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - branch: main diff --git a/.github/workflows/deploy_parse.yml b/.github/workflows/deploy_parse.yml new file mode 100644 index 0000000..93b3785 --- /dev/null +++ b/.github/workflows/deploy_parse.yml @@ -0,0 +1,43 @@ +name: manuscript_export +on: + push: + branches: + - main + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.8" + + - name: upgrade pip and install python packages + run: | + python -m pip install --upgrade pip + pip install -U pip setuptools + pip install requests + pip install beautifulsoup4 + pip install pandas + pip install lxml + pip install html5lib + - name: output_render + uses: jannekem/run-python-script-action@v1 + with: + script: | + + - name: commit files + continue-on-error: true + run: | + today=$(date +"%Y-%m-%d") + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + git add -A + git commit -m "updated datasets ${today} UTC" -a + - name: push changes + continue-on-error: true + uses: ad-m/github-push-action@v0.6.0 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + branch: main