First version of the Wings sitemap generator

This commit is contained in:
Jelle Hermsen 2021-01-14 15:31:08 +01:00
parent 1e5cbf0848
commit f71f99b93f
5 changed files with 180 additions and 0 deletions

12
Pipfile Normal file
View File

@ -0,0 +1,12 @@
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"
[packages]
xlsxwriter = "*"
[dev-packages]
[requires]
python_version = "3.7"

29
Pipfile.lock generated Normal file
View File

@ -0,0 +1,29 @@
{
"_meta": {
"hash": {
"sha256": "b67ff932c90a9d172a88b0e0f9e2c8af540679673ec115cdc823c6c117a300cb"
},
"pipfile-spec": 6,
"requires": {
"python_version": "3.7"
},
"sources": [
{
"name": "pypi",
"url": "https://pypi.org/simple",
"verify_ssl": true
}
]
},
"default": {
"xlsxwriter": {
"hashes": [
"sha256:9b1ade2d1ba5d9b40a6d1de1d55ded4394ab8002718092ae80a08532c2add2e6",
"sha256:b807c2d3e379bf6a925f472955beef3e07495c1bac708640696876e68675b49b"
],
"index": "pypi",
"version": "==1.3.7"
}
},
"develop": {}
}

23
README.md Normal file
View File

@ -0,0 +1,23 @@
Bij1 Wings sitemap generator
============================
To get this up and running you'll need to have
[Pipenv](https://pipenv.readthedocs.io/en/latest/) and
[Pyenv](https://github.com/pyenv/pyenv) running on your machine.
Pipenv takes care of dependency management and the creation of virtual envs.
Pyenv takes care of handling the various versions of Python. They both work
nicely together, so whenever we want to upgrade to a newer Python version,
Pipenv can automatically setup a new Python version from Pyenv.
To setup this project:
- clone this repository wherever you want
- move to the root directory of this repository
- run `pipenv sync -d` to install all the dependencies defined in Pipfile.lock
(the -d is added to make sure you also install development libraries if there
are any)
should be a direct dump from the Wings API
- you will need a json file with a complete content dump from Wings (check
download.sh.example for how to download it using Curl and GraphQL. You'll
need to replace APP_KEY and WINGS_PROJECT)
- now you can start the sitemap generator with
`pipenv run sitemap.py [YOUR_JSON_FILENAME] [OUTPUT_XLSX_FILENAME]`

1
download.sh.example Normal file
View File

@ -0,0 +1 @@
curl 'https://api.wings.dev' -H 'Accept-Encoding: gzip, deflate, br' -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'Connection: keep-alive' -H 'Authorization: Bearer APP_KEY' -H 'X-Wings-Project: WINGS_PROJECT' --data-binary '{"query":"fragment NodeFields on Node {\n id\n title\n resourceType\n slug\n featured {\n title\n description\n image {\n url\n }\n }\n locale {\n id\n name\n primary\n }\n image {\n id\n name\n caption\n alt\n key\n url\n }\n meta {\n key\n value\n }\n data {\n key\n data\n }\n menu {\n id\n name\n items {\n text\n url\n items {\n text\n url\n }\n }\n }\n status\n nodeType\n platforms {\n search {\n title\n description\n }\n facebook {\n title\n description\n image {\n url\n }\n }\n twitter {\n title\n description\n image {\n url\n }\n }\n whatsapp {\n text\n }\n meta {\n tag\n attributes {\n key\n value\n }\n }\n }\n}\n\nfragment CampaignFields on Campaign {\n intro\n description\n submissionSchema\n settings {\n legal {\n terms {\n url\n }\n privacyPolicy {\n url\n }\n }\n }\n}\n\n{\n articles: entries(selector: { typeId: { eq: \"article\" } }, first: 0) {\n edges {\n node {\n ...NodeFields\n content\n type {\n id\n }\n }\n }\n }\n pages: entries(selector: { typeId: { eq: \"page\" } }, first: 0) {\n edges {\n node {\n ...NodeFields\n content\n type {\n id\n }\n }\n }\n }\n events(first: 0) {\n edges {\n node {\n ...NodeFields\n ...CampaignFields\n schedule {\n start\n end\n }\n location {\n name\n street\n city\n zip\n country\n }\n fee {\n amount {\n amount\n currency {\n id\n name\n symbol\n }\n }\n }\n attendeeCount\n }\n }\n }\n signups(first: 0) {\n edges {\n node {\n ...NodeFields\n ...CampaignFields\n }\n }\n }\n petitions(first: 0) {\n edges {\n node {\n ...NodeFields\n ...CampaignFields\n signatureCount\n signatureGoal\n }\n }\n }\n fundraisers(first: 0) {\n edges {\n node {\n ...NodeFields\n ...CampaignFields\n target {\n amount\n currency {\n id\n name\n symbol\n }\n }\n amounts {\n options {\n amount {\n amount\n currency {\n id\n name\n symbol\n }\n }\n }\n }\n raised {\n amount\n currency {\n id\n name\n symbol\n }\n }\n paymentMethods {\n id\n title\n icons {\n url\n }\n }\n }\n }\n }\n currentApp {\n ... on WebApp {\n home {\n node {\n id\n }\n }\n menu {\n id\n name\n items {\n text\n url\n items {\n text\n url\n }\n }\n }\n }\n }\n }\n","variables":{}}' --compressed | python -m json.tool > content.json

115
sitemap.py Executable file
View File

@ -0,0 +1,115 @@
#!/usr/bin/env python3
# ____ ___ _ _
# | __ )_ _| | / |
# | _ \| |_ | | |
# | |_) | | |_| | |
# |____/___\___/|_|
#
# Bij1 Wings Sitemap Generator
from os.path import isfile
import json
import sys
import xlsxwriter
if len(sys.argv) < 3:
print('Please supply a json-file and an output xlsx filename as arguments.')
quit()
json_filename = sys.argv[1]
output_filename = sys.argv[2]
if not isfile(json_filename):
print('Json file could not be found')
quit()
content = {}
with open(json_filename, "r") as content_raw:
content = json.loads(content_raw.read())
# ----------------------------
# Retrieve data from json
# ----------------------------
ordered_data = {
'articles': [],
'pages': [],
'fundraisers': [],
'signups': [],
'petitions': [],
}
for cat in ordered_data:
mobiledoc_key = 'content'
if cat not in ['pages', 'articles']:
mobiledoc_key = 'description'
for item in content['data'][cat]['edges']:
node = item['node']
ordered_data[cat].append({
'slug': node['slug'],
'title': node['title'],
'status': node['status'],
'mobiledoc': node[mobiledoc_key],
'language': node['locale']['name'],
})
# ----------------------------
# Generate xlsx file
# ----------------------------
workbook = xlsxwriter.Workbook(output_filename)
worksheet = workbook.add_worksheet()
bold = workbook.add_format({'bold': True})
header_format = workbook.add_format({
'bold': True,
'align': 'center',
'valign': 'vcenter',
'fg_color': '#D7E4BC',
'border': 1
})
worksheet.set_row(0, None, header_format)
col_widths = []
# Set up header column
column_names = [
'url', 'type', 'title', 'slug', 'language', 'status'
]
column_widths = {
'url': 50,
'type': 10,
'title': 40,
'slug': 20,
'language': 20,
'status': 20,
}
col = 0
for column_name in column_names:
worksheet.write(0, col, column_name)
width = (len(column_name) + 5)* 1.5
worksheet.set_column(col, col, column_widths[column_name])
col += 1
row = 0
for cat in ordered_data:
for node in ordered_data[cat]:
row += 1
url = 'https://www.bij1.org/'
if cat != 'pages':
url += cat + '/'
url += node['slug']
worksheet.write(row, 0, url)
worksheet.write(row, 1, cat)
worksheet.write(row, 2, node['title'])
worksheet.write(row, 3, node['slug'])
worksheet.write(row, 4, node['language'])
worksheet.write(row, 5, node['status'])
workbook.close()