Commit 9586a453 authored by Dimitri Podborski's avatar Dimitri Podborski
Browse files

Implement the first version of the Systems.py

close #176 #178 #179 #190 #177 #174
parent b2bc6cc5
......@@ -2,6 +2,40 @@
# Generic
####################################
*.html
data/
*.xlsx
*.csv
*.docx
####################################
# macOS
####################################
# General
.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
####################################
# Visual Studio Code
......
# Automation tools for MPEG workflow
MDMS and GitLab modules are curretnly under development. An example how to use them can be dound in `./exampleN.py`.
You can use MDMS and GitLab modules in your own scripts. An example how to use them can be dound in `./example_{mdms,gitlab}.py`.
A **very quick hack** for automatic issue creaton (used for FF group during last meeting) can be found in `./hack.py`.
A simple script to generate issues for each ballot comment from a document received from ISO.
`./generate_ballot_issues.py`.
New ideas are welcome. Open new issues for your ideas.
The general idea is to fire requests to MDMS, process the information, use gitlab API to open issues.
New ideas are welcome. Open new issues for your ideas. The general idea is to fire requests to MDMS, process the information, use gitlab API to open issues.
## Requirements
......@@ -19,15 +12,49 @@ The general idea is to fire requests to MDMS, process the information, use gitla
- `MPEG_PWD` - [MDMS](https://dms.mpeg.expert) password
- `GITLAB_TOKEN` - your private GitLab token. You can create your token [here](http://mpegx.int-evry.fr/software/profile/personal_access_tokens).
## Tools
### Python3
- python3
- ...
It is recommended to use python [virtual environment](https://docs.python.org/3/library/venv.html#module-venv).
Run the following commands to set everything up:
### Python packages / libs
```shell
git clone http://mpegx.int-evry.fr/software/podborski/AutomationTools.git
cd AutomationTools
python3 -m venv venv
source venv/bin/activate
pip install -r requirements.txt
Set up the required python modules using
(run your scripts)
```shell
python3 -m pip install -r requirements.txt
deactivate
```
## systems.py
`systems.py` is a script which is intended to be used in the Systems group. It takes a CSV file as an input and allows you to:
1. Print information about input documents on MDMS and GitLab (optional):
e.g.: `python systems.py -m m55958,m55959,m56121 -p FileFormat/CENC`
2. Open issues based on the information provided in a CSV file:
e.g.: `python systems.py --csv Contribs.csv -i`
3. Generate an output document based on the information provided in a CSV file. Use a template (optional) as a basis:
e.g.: `python systems.py --csv Contribs.csv -o --template templates/WG03_input_template_dimitri.docx`
The CSV file must have a header row with the folowing entries:
- `Number` - MPEG document number e.g. m12345
- To determine which GitLab project needs to be used make sure that your CSV file has either:
- `Project URL`
- or `Sub Group` with `Project Name`
The example below has both `Project URL` and (`Sub Group` with `Project Name`) but you can also have one of these in your CSV. The CSV delimiter is determined automatically.
```csv
Number;Title;Project URL;Sub Group;Project Name
m55958;On item encryption;http://mpegx.int-evry.fr/software/MPEG/Systems/FileFormat/CENC;FileFormat;CENC
m55959;On multi-key encryption;http://mpegx.int-evry.fr/software/MPEG/Systems/FileFormat/CENC;FileFormat;CENC
```
## generate_ballot_issues.py
A simple script to generate issues for each ballot comment from a document received from ISO.
# -*- coding: utf-8 -*-
'''
This is the interface to MPEG GitLab API.
'''
import os
import gitlab
from enum import Enum, unique
BASE_URL = 'http://mpegx.int-evry.fr/software'
TOKEN = os.environ.get('GITLAB_TOKEN')
# TODO: implement me
\ No newline at end of file
@unique
class Label(Enum):
Accepted = 'Accepted'
BallotComment = 'BallotComment'
Combined = 'Combined'
DocAvailable = 'DocAvailable'
Editorial = 'Editorial'
Late = 'Late'
NeedsRevision = 'NeedsRevision'
Noted = 'Noted'
Postponed = 'Postponed'
ProbableAgreement = 'ProbableAgreement'
Rejected = 'Rejected'
Revised = 'Revised'
SeeDoCR = 'SeeDoCR'
Withdrawn = 'Withdrawn'
# private token authentication
GL = gitlab.Gitlab(BASE_URL, private_token=TOKEN)
try:
GL.auth()
print('GitLab API: Authenticated as "{}"'.format(GL.user.username))
except gitlab.exceptions.GitlabAuthenticationError:
print('Error: Could not authenticate. Please set the valid private GITLAB_TOKEN env. variable.')
GL = None
def _get_project(project_id):
if not GL:
print('Error: GitLab API authentication failed.')
return
try:
project = GL.projects.get(project_id)
except gitlab.exceptions.GitlabGetError as err:
print('project_id', project_id, err)
return None
return project
# --------------------------------------------------------------------------------------------------
# Interfaces
# --------------------------------------------------------------------------------------------------
def get_projects():
if not GL:
print('Error: GitLab API authentication failed.')
return []
projects = GL.projects.list(all=True)
projects_stripped = []
for project in projects:
projects_stripped.append({
'id': project.id,
'name': project.name,
'url': project.web_url,
'path_with_namespace': project.path_with_namespace,
'description': project.description
})
return projects_stripped
def get_members(group_id):
if not GL:
print('Error: GitLab API authentication failed.')
return []
group = GL.groups.get(group_id)
subgroups = group.subgroups.list()
members_stripped = {}
for subgroup in subgroups:
real_group = GL.groups.get(subgroup.id, lazy=True)
members = real_group.members.all(all=True)
for member in members:
if not member.username in members_stripped:
members_stripped[member.username] = {
'id': member.id,
'name': member.name,
'url': member.web_url
}
return members_stripped
def get_issues(project_id):
project = _get_project(project_id)
if not project:
return []
issues = project.issues.list(state='opened')
return issues
def open_issue(project_id, title, description, labels=[]):
project = _get_project(project_id)
if not project:
return
issue = project.issues.create({'title': title, 'description': description, 'labels': labels})
issue.save()
# -*- coding: utf-8 -*-
'''
Some helper functions
'''
import json
import os
from datetime import datetime, timedelta
from docx import Document, opc, oxml, shared
from docx.enum.text import WD_ALIGN_PARAGRAPH # pylint: disable=E0611
OPENING_TAG = '[//]: # ( !!! ATTENTION !!! DO NOT MODIFY BEFORE AND AFTER THIS LINE)'
CLOSING_TAG = '[//]: # ( !!! ATTENTION !!! YOU CAN MODIFY AFTER THIS LINE)'
class DocumentFormatter:
def __init__(self, template_path):
self.__doc = Document(docx = template_path)
def save(self, output_path):
self.__doc.save(output_path)
def add_project(self, project):
project_description = project['description']
project_url = project['url']
project_name = project['name']
self.__doc.add_heading(project_name, 2)
self.__doc.add_paragraph(project_description)
p = self.__doc.add_paragraph('GitLab project ')
self.add_hyperlink(p, project_url, 'link')
# https://github.com/python-openxml/python-docx/issues/74#issuecomment-261169410
def add_hyperlink(self, paragraph, url, text):
part = paragraph.part
r_id = part.relate_to(url, opc.constants.RELATIONSHIP_TYPE.HYPERLINK, is_external = True)
hyperlink = oxml.shared.OxmlElement('w:hyperlink')
hyperlink.set(oxml.shared.qn('r:id'), r_id, )
run = oxml.shared.OxmlElement('w:r')
rPr = oxml.shared.OxmlElement('w:rPr')
c = oxml.shared.OxmlElement('w:color')
c.set(oxml.shared.qn('w:val'), '0000EE')
rPr.append(c)
u = oxml.shared.OxmlElement('w:u')
u.set(oxml.shared.qn('w:val'), 'single')
rPr.append(u)
run.append(rPr)
run.text = text
hyperlink.append(run)
paragraph._p.append(hyperlink)
def add_contribution(self, contribution):
document = contribution['document']
details = contribution['details']
issue_meta = contribution['issue_meta']
issue_title = contribution['issue_title']
# Create a heading 3 with the document number (linked to a container) and title
h = self.__doc.add_heading('', 3)
self.add_hyperlink(h, document['container'], document['document'])
h.add_run(' ' + document['title'])
# Create a 4x2 table with all borders
table = self.__doc.add_table(rows = 4, cols = 2)
table.style = 'Table Grid'
# Set the text of all the cells
table.rows[0].cells[0].text = 'Authors'
table.rows[0].cells[1].text = details['authors_string']
table.rows[0].cells[1].paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.LEFT
table.rows[1].cells[0].text = 'Abstract'
if details['abstract'] is not None:
table.rows[1].cells[1].text = details['abstract']
table.rows[2].cells[0].text = 'Discussions'
issues_added = 0
if issue_meta is not None:
self.add_hyperlink(table.rows[2].cells[1].paragraphs[0], issue_meta.web_url, issue_meta.references['full'])
issues_added += 1
if issue_title is not None:
if issues_added > 0:
p = table.rows[2].cells[1].add_paragraph()
self.add_hyperlink(p, issue_title.web_url, issue_title.references['full'])
else:
self.add_hyperlink(table.rows[2].cells[1].paragraphs[0], issue_title.web_url, issue_title.references['full'])
issues_added += 1
table.rows[3].cells[0].text = 'Disposition'
# Set column widths
for cell in table.columns[0].cells:
cell.width = shared.Cm(2)
for cell in table.columns[1].cells:
cell.width = shared.Cm(14)
def is_document_late(meeting_start, v1_upload_timestamp):
'''
meeting_start and v1_upload_timestamp shall be datetime objects
'''
meeting_start = meeting_start.replace(hour=0, minute=0, second=0) # paranoia
deadline = meeting_start - timedelta(days=4) # End of Wednesday
diff = deadline - v1_upload_timestamp
if diff.total_seconds() <= 0:
return True
return False
def try_parsing_date(text):
'''
Try parsing the timestamp, if not possible return None
'''
for fmt in ('%Y-%m-%d %H:%M:%S', 'Y-%m-%d'):
try:
return datetime.strptime(text.strip(), fmt)
except ValueError:
pass
return None
def load_json_data(json_path):
'''
Load json file from json_path and return the data.
'''
with open(json_path, 'r') as f:
data = json.load(f)
return data
def store_json_data(json_path, data):
'''
Store data as a json file to json_path. datetime objects are stored as strings.
'''
dir_name = os.path.dirname(json_path)
if not os.path.exists(dir_name) and len(dir_name) > 0:
os.makedirs(dir_name)
with open(json_path, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2, default=str)
def find_meeting(meetings, meeting_number):
'''
Find and return a meeting using the meeting_number. If meeting_number < 0 return the latest meeting.
'''
if meeting_number < 0:
return max(meetings, key=lambda x:x['number'])
for meeting in meetings:
if meeting['number'] == meeting_number:
return meeting
return None
def find_document(documents, document_number):
striped_doc_nr = document_number.replace(' ', '').strip().lower()
for doc in documents:
if striped_doc_nr in doc['document']:
return doc
return None
def find_project(projects, url_or_path):
'''
Search for gitlab project based on URL or path_with_namespace.
'''
if url_or_path is None:
return None
striped_url_or_path = url_or_path.replace(' ', '').strip()
for project in projects:
if striped_url_or_path == project['url']:
return project
if striped_url_or_path.lower() in project['path_with_namespace'].lower():
return project
return None
def find_issue(issues, document):
title_only_hit = None
metadata_hit = None
last_version = 0
for issue in issues:
if document['document'] in issue.title:
meta = get_issue_metadata(issue.description)
if meta == None:
title_only_hit = issue
else:
if int(meta['mdms_id']) == document['mdms_id']:
metadata_hit = issue
if len(meta['version']) > 0:
last_version = int(meta['version'])
else:
print('WARNING. We found a GitLab issue with the document number in the title and with metadata tag in description. But the metadata tag has wrong document id in it.')
return title_only_hit, metadata_hit, last_version
def get_issue_metadata(description):
'''
Find and parse the metada from the description of the issue
'''
pattern = '[meta]: # ('
pos1 = description.find(pattern)
if pos1 < 0:
return None
pos2 = description.find(')', pos1 + len(pattern))
meta_str = description[pos1+len(pattern):pos2]
meta = meta_str.split(',')
return {'mdms_id': meta[0], 'document': meta[1], 'title': meta[2], 'version': meta[3]}
def create_issue_metadata(document, details):
'''
Create a metadata tag
'''
version = ''
if len(details['documents']) > 0:
last_doc = max(details['documents'], key=lambda x:x['version'])
version = str(last_doc['version'])
meta = '[meta]: # ({},{},{},{})'.format(document['mdms_id'], document['document'], document['title'], version)
return meta
def create_issue_description_header(document, details):
'''
Create issue description header, with metadata and the table
'''
description = OPENING_TAG + '\n'
description += create_issue_metadata(document, details)
description += '\n\n| Container | Company | Authors | Document |\n'
description += '|-|-|-|-|\n'
description += '| [{}]({}) | {} | '.format(document['document'], document['container'], details['organizations'])
for author in document['authors']:
description += ' - {} <br>'.format(author['name'])
description += ' | '
for version in details['documents']:
description += ' - [version {}]({}) <br>'.format(version['version'], version['path'])
description += ' |\n\n'
description += CLOSING_TAG
return description
def create_issue_description(document, details):
'''
Create the description of the issue: metadata, table, abstract
'''
description = create_issue_description_header(document, details)
description += '\n\n### Abstract\n'
if details['abstract']:
description += details['abstract']
else:
description += '* [ ] please add an abstract of your document\n'
description += '\n\n_automatically generated issue_'
return description
def create_issue_title(document):
return document['document'].strip() + ' ' + document['title'].strip()
def get_updated_issue_description(current_decription, document, details):
pos1 = current_decription.find(CLOSING_TAG)
if pos1 < 0:
return None
description = create_issue_description_header(document, details)
description += current_decription[pos1+len(CLOSING_TAG):]
return description
\ No newline at end of file
# -*- coding: utf-8 -*-
'''
This is the interface to MPEG Document Management System (mdms).
It requests the data from MDMS and parses the output HTML
'''
from urllib.parse import urljoin, parse_qs, urlparse
import os
import requests
......
'''
Just a few examples on how to use gitlab module
'''
from automation import gitlab
# get all projects
projects = gitlab.get_projects()
print('project count:', len(projects))
for project in projects:
print(project)
issues = gitlab.get_issues(projects[0]['id'])
print('issues count:', len(issues))
\ No newline at end of file
'''
Just a few examples on how to use mdms module
'''
from automation import mdms
# Get all meetings
meetings = mdms.get_meetings()
print('Number of MPEG meetings:', len(meetings))
# Get latest meeting, (this calles mdms.get_meetings() internally)
last_meeting = mdms.get_current_meeting()
print('\nLast MPEG#{} ({}) from {} to {}'.format(last_meeting['number'], last_meeting['name'], last_meeting['start_date'], last_meeting['end_date']))
# Get all input documents of a certain meeting
input_docs = mdms.get_input_documents(last_meeting['id'])
print('\nNumber of input contributions:', len(input_docs))
print('First entry:', input_docs[0])
# Get detailed information of a certain document
doc_details = mdms.get_document_details(input_docs[0]['mdms_id'])
print('\nDetails of the first document:', doc_details)
from automation import mdms
'''
This is an example how to get a list of authors who had input documents
with a specific substring in the title
'''
from automation import mdms, helpers
import sys
import os
# meetings with the smaller number will be ignored
LAST_MEETING_NUMBER = 42
DATA_PATH = './data'
CONRIBUTORS_PATH = os.path.join(DATA_PATH, 'ff_contributors.json')
TITLE_WHITELIST= ['ISOBMFF', 'BMFF', '14496-12', 'file format', 'MP4', 'ISOBMF', 'ISO Base']
authors = {}
def document_is_relevant(title, filter_strings):
return any(sub.lower().strip() in title.lower().strip() for sub in filter_strings)
meetings = mdms.get_meetings()
for meeting in meetings:
if meeting['number'] < LAST_MEETING_NUMBER:
continue
print('process meegting', meeting['number'])
input_docs = mdms.get_input_documents(meeting['id'])
for doc in input_docs:
if not document_is_relevant(doc['title'], TITLE_WHITELIST):
def get_contributors(last_meeting_number):
authors = {}
meetings = mdms.get_meetings()
for meeting in meetings:
if meeting['number'] < last_meeting_number:
continue
for author in doc['authors']:
if author['name'] in authors:
authors[author['name']].append(doc)
else:
authors[author['name']] = [doc]
# print author names and number of contributions
for author in authors:
print(author, ' :', len(authors[author]))
\ No newline at end of file
print('process meegting', meeting['number'])
input_docs = mdms.get_input_documents(meeting['id'])
for doc in input_docs:
if not document_is_relevant(doc['title'], TITLE_WHITELIST):
continue
for author in doc['authors']:
if author['name'] in authors:
authors[author['name']].append(doc)
else:
authors[author['name']] = [doc]
return authors
if not os.path.isfile(CONRIBUTORS_PATH):
authors = get_contributors(LAST_MEETING_NUMBER)
helpers.store_json_data(CONRIBUTORS_PATH, authors)
authors = helpers.load_json_data(CONRIBUTORS_PATH)
blacklist = ['SC 29 Secretariat', 'WG 1', 'ITTF via SC 29 Secretariat', 'ITTF', 'ISO secretariat']
groups = {}
dups = ['Singer', 'Hannuksela', 'Aksu', 'Sreedhar', 'Zia', 'Stockhammer', 'Yago', 'Feuvre', 'Deshpande', 'Curcio']
for name in authors:
if name in blacklist or len(name) == 0:
continue
skip = False
for k in groups:
for dup in dups:
if dup in name and dup in k:
groups[k] += len(authors[name])
skip = True
break
if skip:
break
if not skip:
groups[name] = len(authors[name])
groups = {key:val for key, val in groups.items() if val > 10}
groups = dict(sorted(groups.items(), reverse=True, key=lambda item: item[1]))
output = {
'authors': [],
'contributions': []
}
for entry in groups:
output['authors'].append(entry)
output['contributions'].append(groups[entry])
print(entry, ' ', groups[entry])
helpers.store_json_data('./data/plot.json', output)
\ No newline at end of file