Commit 72944f6f authored by Dimitri Podborski's avatar Dimitri Podborski
Browse files

pip 8

parent 7aaf00e2
# -*- coding: utf-8 -*-
'''
"""
This is the interface to MPEG GitLab API.
'''
"""
import os
import gitlab
......@@ -10,6 +10,7 @@ from enum import Enum, unique
BASE_URL = 'http://mpegx.int-evry.fr/software'
TOKEN = os.environ.get('GITLAB_TOKEN')
@unique
class Label(Enum):
Accepted = 'Accepted'
......@@ -27,6 +28,7 @@ class Label(Enum):
SeeDoCR = 'SeeDoCR'
Withdrawn = 'Withdrawn'
# private token authentication
GL = gitlab.Gitlab(BASE_URL, private_token=TOKEN)
try:
......@@ -48,6 +50,7 @@ def _get_project(project_id):
return None
return project
# --------------------------------------------------------------------------------------------------
# Interfaces
# --------------------------------------------------------------------------------------------------
......@@ -68,6 +71,7 @@ def get_projects():
})
return projects_stripped
def get_members(group_id):
if not GL:
print('Error: GitLab API authentication failed.')
......@@ -79,7 +83,7 @@ def get_members(group_id):
real_group = GL.groups.get(subgroup.id, lazy=True)
members = real_group.members.all(all=True)
for member in members:
if not member.username in members_stripped:
if member.username not in members_stripped:
members_stripped[member.username] = {
'id': member.id,
'name': member.name,
......@@ -87,6 +91,7 @@ def get_members(group_id):
}
return members_stripped
def get_issues(project_id):
project = _get_project(project_id)
if not project:
......@@ -94,13 +99,17 @@ def get_issues(project_id):
issues = project.issues.list(state='opened', all=True)
return issues
def open_issue(project_id, title, description, labels=[]):
def open_issue(project_id, title, description, labels=None):
project = _get_project(project_id)
if not project:
return
if labels is None:
labels = []
issue = project.issues.create({'title': title, 'description': description, 'labels': labels})
issue.save()
def close_issue(issue):
if isinstance(issue, gitlab.v4.objects.ProjectIssue):
issue.state_event = 'close'
......
# -*- coding: utf-8 -*-
'''
"""
Some helper functions
'''
"""
import json
import os
import re
......@@ -12,28 +12,30 @@ from docx.enum.text import WD_ALIGN_PARAGRAPH # pylint: disable=E0611
OPENING_TAG = '[//]: # ( !!! ATTENTION !!! DO NOT MODIFY BEFORE AND AFTER THIS LINE)'
CLOSING_TAG = '[//]: # ( !!! ATTENTION !!! YOU CAN MODIFY AFTER THIS LINE)'
class DocumentFormatter:
def __init__(self, template_path):
self.__doc = Document(docx = template_path)
self.__doc = Document(docx=template_path)
def save(self, output_path):
self.__doc.save(output_path)
# https://github.com/python-openxml/python-docx/issues/74#issuecomment-261169410
def add_hyperlink(self, paragraph, url, text):
@staticmethod
def add_hyperlink(paragraph, url, text):
part = paragraph.part
r_id = part.relate_to(url, opc.constants.RELATIONSHIP_TYPE.HYPERLINK, is_external = True)
r_id = part.relate_to(url, opc.constants.RELATIONSHIP_TYPE.HYPERLINK, is_external=True)
hyperlink = oxml.shared.OxmlElement('w:hyperlink')
hyperlink.set(oxml.shared.qn('r:id'), r_id, )
run = oxml.shared.OxmlElement('w:r')
rPr = oxml.shared.OxmlElement('w:rPr')
r_pr = oxml.shared.OxmlElement('w:rPr')
c = oxml.shared.OxmlElement('w:color')
c.set(oxml.shared.qn('w:val'), '0000EE')
rPr.append(c)
r_pr.append(c)
u = oxml.shared.OxmlElement('w:u')
u.set(oxml.shared.qn('w:val'), 'single')
rPr.append(u)
run.append(rPr)
r_pr.append(u)
run.append(r_pr)
run.text = text
hyperlink.append(run)
paragraph._p.append(hyperlink)
......@@ -44,7 +46,6 @@ class DocumentFormatter:
project_name = project['name']
h = self.__doc.add_heading('', 2)
self.add_hyperlink(h, project_url, project_name)
p = None
if len(project_description) > 0:
p = self.__doc.add_paragraph(project_description)
else:
......@@ -63,7 +64,7 @@ class DocumentFormatter:
h.add_run(' ' + document['title'])
# Create a 4x2 table with all borders
table = self.__doc.add_table(rows = 4, cols = 2)
table = self.__doc.add_table(rows=4, cols=2)
table.style = 'Table Grid'
# Set the text of all the cells
......@@ -85,7 +86,8 @@ class DocumentFormatter:
p = table.rows[2].cells[1].add_paragraph()
self.add_hyperlink(p, issue_title.web_url, issue_title.references['full'])
else:
self.add_hyperlink(table.rows[2].cells[1].paragraphs[0], issue_title.web_url, issue_title.references['full'])
self.add_hyperlink(table.rows[2].cells[1].paragraphs[0], issue_title.web_url,
issue_title.references['full'])
issues_added += 1
table.rows[3].cells[0].text = 'Disposition'
......@@ -98,10 +100,11 @@ class DocumentFormatter:
p = self.__doc.add_paragraph('<minutes>')
p.paragraph_format.space_before = shared.Pt(8)
def is_document_late(meeting_start, v1_upload_timestamp):
'''
"""
meeting_start and v1_upload_timestamp shall be datetime objects
'''
"""
meeting_start = meeting_start.replace(hour=0, minute=0, second=0) # paranoia
deadline = meeting_start - timedelta(days=7) # End of Sunday
diff = deadline - v1_upload_timestamp
......@@ -109,10 +112,11 @@ def is_document_late(meeting_start, v1_upload_timestamp):
return True
return False
def try_parsing_date(text):
'''
"""
Try parsing the timestamp, if not possible return None
'''
"""
for fmt in ('%Y-%m-%d %H:%M:%S', 'Y-%m-%d'):
try:
return datetime.strptime(text.strip(), fmt)
......@@ -120,37 +124,41 @@ def try_parsing_date(text):
pass
return None
def load_json_data(json_path):
'''
"""
Load json file from json_path and return the data.
'''
"""
with open(json_path, 'r', encoding='utf-8') as f:
data = json.load(f)
return data
def store_json_data(json_path, data):
'''
"""
Store data as a json file to json_path. datetime objects are stored as strings.
'''
"""
dir_name = os.path.dirname(json_path)
if not os.path.exists(dir_name) and len(dir_name) > 0:
os.makedirs(dir_name)
with open(json_path, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2, default=str)
def find_meeting(meetings, meeting_number):
'''
"""
Find and return a meeting using the meeting_number. If meeting_number < 0 return the latest meeting.
'''
"""
if len(meetings) == 0:
return None
if meeting_number < 0:
return max(meetings, key=lambda x:x['number'])
return max(meetings, key=lambda x: x['number'])
for meeting in meetings:
if meeting['number'] == meeting_number:
return meeting
return None
def find_document(documents, document_number):
striped_doc_nr = document_number.replace(' ', '').strip().lower()
for doc in documents:
......@@ -158,10 +166,11 @@ def find_document(documents, document_number):
return doc
return None
def find_project(projects, url_or_path, path_root = 'MPEG/'):
'''
def find_project(projects, url_or_path, path_root='MPEG/'):
"""
Search for gitlab project based on URL or path_with_namespace.
'''
"""
if url_or_path is None:
return None
striped_url_or_path = url_or_path.replace(' ', '').strip().strip('/')
......@@ -173,6 +182,7 @@ def find_project(projects, url_or_path, path_root = 'MPEG/'):
return project
return None
def find_issue(issues, document):
title_only_hit = None
metadata_hit = None
......@@ -180,7 +190,7 @@ def find_issue(issues, document):
for issue in issues:
if document['document'] in issue.title:
meta = get_issue_metadata(issue.description)
if meta == None:
if meta is None:
title_only_hit = issue
else:
if int(meta['mdms_id']) == document['mdms_id']:
......@@ -188,40 +198,44 @@ def find_issue(issues, document):
if len(meta['version']) > 0:
last_version = int(meta['version'])
else:
print('WARNING. We found a GitLab issue with the document number in the title and with metadata tag in description. But the metadata tag has wrong document id in it.')
print('WARNING. We found a GitLab issue with the document number in the title and with metadata '
'tag in description. But the metadata tag has wrong document id in it.')
return title_only_hit, metadata_hit, last_version
def get_issue_metadata(description):
'''
"""
Find and parse the metada from the description of the issue
'''
"""
pattern = '[meta]: # ('
pos1 = description.find(pattern)
if pos1 < 0:
return None
pos2 = description.find(')', pos1 + len(pattern))
meta_str = description[pos1+len(pattern):pos2]
meta_str = description[pos1 + len(pattern):pos2]
meta = meta_str.split(',')
if len(meta) != 4:
return None
return {'mdms_id': meta[0], 'document': meta[1], 'title': meta[2], 'version': meta[3]}
def create_issue_metadata(document, details):
'''
"""
Create a metadata tag
'''
"""
version = ''
if len(details['documents']) > 0:
last_doc = max(details['documents'], key=lambda x:x['version'])
last_doc = max(details['documents'], key=lambda x: x['version'])
version = str(last_doc['version'])
title = document['title'].replace('(', '').replace(')', '').replace(',', '')
meta = '[meta]: # ({},{},{},{})'.format(document['mdms_id'], document['document'], title, version)
return meta
def create_issue_description_header(document, details):
'''
"""
Create issue description header, with metadata and the table
'''
"""
description = OPENING_TAG + '\n'
description += create_issue_metadata(document, details)
description += '\n\n| Container | Company | Authors | Document |\n'
......@@ -236,31 +250,36 @@ def create_issue_description_header(document, details):
description += CLOSING_TAG
return description
def create_issue_description(document, details):
'''
"""
Create the description of the issue: metadata, table, abstract
'''
"""
description = create_issue_description_header(document, details)
description += '\n\n### Abstract\n'
if details['abstract']:
description += details['abstract']
else:
description += '* [ ] please **add your abstract here**.\n'
description += '* [ ] please also **add your abstract to MDMS** (this can be used when we create the output document).\n'
description += '* [ ] please also **add your abstract to MDMS** (this can be used when we create the output ' \
'document).\n '
description += '\n\n_automatically generated issue_'
return description
def create_issue_title(document):
return document['document'].strip() + ' ' + document['title'].strip()
def get_updated_issue_description(current_decription, document, details):
pos1 = current_decription.find(CLOSING_TAG)
if pos1 < 0:
return None
description = create_issue_description_header(document, details)
description += current_decription[pos1+len(CLOSING_TAG):]
description += current_decription[pos1 + len(CLOSING_TAG):]
return description
def find_gitlab_users(gitlab_users, document):
usernames = []
regex = re.compile(r'[^a-zA-Z\s]')
......@@ -268,11 +287,11 @@ def find_gitlab_users(gitlab_users, document):
for author in document['authors']:
author_name = author['name'].lower().strip()
author_name = regex.sub('', author_name) # remove non alphabetic chars
author_name = ' '.join( [w for w in author_name.split() if len(w)>1] ) # remove single letters
author_name = ' '.join([w for w in author_name.split() if len(w) > 1]) # remove single letters
for key in gitlab_users:
gl_name = gitlab_users[key]['name'].lower().strip()
gl_name = regex.sub('', gl_name) # remove non alphabetic chars
gl_name = ' '.join( [w for w in gl_name.split() if len(w)>1] ) # remove single letters
gl_name = ' '.join([w for w in gl_name.split() if len(w) > 1]) # remove single letters
if author_name == gl_name:
usernames.append(key)
except:
......
# -*- coding: utf-8 -*-
'''
"""
This is the interface to MPEG Document Management System (mdms).
It requests the data from MDMS and parses the output HTML
'''
"""
import os
import requests
......@@ -21,10 +21,11 @@ DOCUMENT_URL = urljoin(BASE_URL, 'current_document.php')
MPEG_LOGIN = os.environ.get('MPEG_LOGIN')
MPEG_PWD = os.environ.get('MPEG_PWD')
class MDMSParser:
def parse_meetings(self, html):
meetings = []
soup = bs4.BeautifulSoup(html, features = 'lxml')
soup = bs4.BeautifulSoup(html, features='lxml')
tables = soup.find('body').find_all('table')
if len(tables) != 1:
print('Error: Only single table should be present in "All Meetings" frame. Did layout of MDMS change?')
......@@ -32,7 +33,7 @@ class MDMSParser:
rows = tables[0].find_all('tr')
for n in range(len(rows)):
if n==0: # check header first
if n == 0: # check header first
header = ['number', 'name', 'start date', 'end date', 'last input document', 'last output document']
if not self.check_table_header(header, rows[n]):
print('Error: Wrong table header. Did layout of MDMS change?')
......@@ -66,10 +67,10 @@ class MDMSParser:
return meetings
def parse_author_entry(self, author_entry):
'''
"""
Search entry string for an email, remove it from the name and clean up
Return a tuple('name', 'email')
'''
"""
author_entry = author_entry.strip()
if len(author_entry) == 0:
return None
......@@ -83,12 +84,12 @@ class MDMSParser:
# remove all non ASCII characters
author_entry = re.sub(r'[^\x00-\x7F]+', '', author_entry)
author_entry = author_entry.strip()
return (author_entry, email)
return author_entry, email
def try_parsing_date(self, text):
'''
"""
Try parsing the timestamp, if not possible return None
'''
"""
for fmt in ('%Y-%m-%d %H:%M:%S', 'Y-%m-%d'):
try:
return datetime.strptime(text.strip(), fmt)
......@@ -98,8 +99,8 @@ class MDMSParser:
def parse_input_docs(self, html):
docs = []
soup = bs4.BeautifulSoup(html, features = 'lxml')
for i in soup.select ('br'): # replace <br/> with a space, it makes checking headers easier
soup = bs4.BeautifulSoup(html, features='lxml')
for i in soup.select('br'): # replace <br/> with a space, it makes checking headers easier
i.replace_with(' ')
form = soup.find('body').find('form', id='documents')
......@@ -113,8 +114,9 @@ class MDMSParser:
rows = table.find_all('tr', recursive=False)
for n in range(len(rows)):
if n==0: # check header first
header = ['number', 'created', 'uploaded', 'Group Working Group / SubGroup', 'title', 'source', 'download']
if n == 0: # check header first
header = ['number', 'created', 'uploaded', 'Group Working Group / SubGroup', 'title', 'source',
'download']
if not self.check_table_header(header, rows[n]):
print('Error: Wrong table header. Did layout of MDMS change?')
return []
......@@ -137,9 +139,7 @@ class MDMSParser:
authors = []
for entry in cols[5].contents:
if isinstance(entry, bs4.Tag):
parsed_href = entry.text
email = None
name = None
try:
parsed_href = urlparse(entry['href'])
email = parsed_href.path
......@@ -148,7 +148,7 @@ class MDMSParser:
if author_data:
name = author_data[0] # clean version of the name
# sometimes people type name and email wrong in MDMS and they are flipped
if not '@' in email and author_data[1]:
if '@' not in email and author_data[1]:
name = email
email = author_data[1]
except KeyError:
......@@ -173,7 +173,7 @@ class MDMSParser:
# get latest document link (if available)
latest_url = None
if len(cols) == 7:
if not cols[6].find('a') == None:
if not cols[6].find('a') is None:
latest_url = urljoin(CURRENT_MEETING_URL, cols[6].find('a')['href'])
docs.append({
......@@ -208,8 +208,8 @@ class MDMSParser:
'documents': []
}
soup = bs4.BeautifulSoup(html, features = 'lxml')
for i in soup.select ('br'): # replace <br/> with a space, it makes checking headers easier
soup = bs4.BeautifulSoup(html, features='lxml')
for i in soup.select('br'): # replace <br/> with a space, it makes checking headers easier
i.replace_with(' ')
# do some checks if format is ok
......@@ -280,34 +280,37 @@ class MDMSParser:
timestamp = None
continue
pos1 = entry.find('(version')
pos2 = entry.find('- date', pos1+8)
pos3 = entry.find(')', pos2+6)
pos2 = entry.find('- date', pos1 + 8)
pos3 = entry.find(')', pos2 + 6)
if pos1 < 0 or pos2 < 0 or pos3 < 0:
continue
version = int(entry[pos1+8:pos2].strip())
timestamp = self.try_parsing_date(entry[pos2+6:pos3])
version = int(entry[pos1 + 8:pos2].strip())
timestamp = self.try_parsing_date(entry[pos2 + 6:pos3])
return details
def check_table_header(self, template, header_row):
'''
"""
Check if header_row contains the same data as the template
'''
"""
cols = header_row.find_all('td', recursive=False)
if not len(template) == len(cols):
print('Error: Table header should have {} columns but it has {}.'.format(len(template), len(cols)))
return False
for n in range(len(template)):
if not cols[n].text.strip().lower() == template[n].strip().lower():
print('Error: Table header entry mismatch: "{}" != "{}".'.format(cols[n].text.strip().lower(), template[n].strip().lower()))
print('Error: Table header entry mismatch: "{}" != "{}".'.format(cols[n].text.strip().lower(),
template[n].strip().lower()))
return False
return True
@unique
class SearchCategory(Enum):
ALL = ''
INPUT = 'm'
OUTPUT = 'w'
# search_id_group
@unique
class Standard(Enum):
......@@ -338,6 +341,7 @@ class Standard(Enum):
EXPLORATIONS = 45
MAR_REFERENCE_MODEL = 46
# search_sub_group
@unique
class Subgroup(Enum):
......@@ -357,17 +361,18 @@ class Subgroup(Enum):
JCTVC = 38
JCT3V = 39
# id_meeting=<meeting_id>
# type_order=0 # 0-inc, 1-decreasing order (input + output documents)
# sql_type=document_number | document_date_time | upload_document_date_time | document.id_sub_group | title | authors
def _get_query_string( meeting_id,
def _get_query_string(meeting_id,
category=SearchCategory.INPUT,
group=Standard.ALL,
subgroup=Subgroup.ALL):
return '?id_meeting={}' \
'&search_category={}' \
'&search_id_group={}' \
'&search_sub_group={}'.format(meeting_id, category.value, group.value, subgroup.value )
'&search_sub_group={}'.format(meeting_id, category.value, group.value, subgroup.value)
# --------------------------------------------------------------------------------------------------
......@@ -375,11 +380,11 @@ def _get_query_string( meeting_id,
# --------------------------------------------------------------------------------------------------
def get_meetings():
'''
"""
Get data for all meetings. Retruns data of all meetings.
[{ 'number', 'id', 'name', 'start_date', 'end_date', 'last_input', 'last_output' }, ...]
'''
"""
response = requests.get(MEETINGS_URL, auth=(MPEG_LOGIN, MPEG_PWD))
if not response.status_code == 200:
print('HTTP response {} != 200'.format(response.status_code))
......@@ -388,23 +393,26 @@ def get_meetings():
parser = MDMSParser()
return parser.parse_meetings(response.text)
def get_current_meeting():
'''
"""
Retruns data of the latest meeeting.
{ 'number', 'id', 'name', 'start_date', 'end_date', 'last_input', 'last_output' }
'''
"""
meetings = get_meetings()
if len(meetings) == 0:
return None
return max(meetings, key=lambda x:x['number'])
return max(meetings, key=lambda x: x['number'])
def get_input_documents(meeting_id, standard=Standard.ALL, subgroup=Subgroup.ALL):
'''
"""
Returns data of all input documents of a certain meeting.
[{'mdms_id', 'document', 'created', 'last_version_uploaded', 'sub_group_text', 'title', 'authors', 'latest_version_url'}, ...]
'''
[{'mdms_id', 'document', 'created', 'last_version_uploaded', 'sub_group_text', 'title', 'authors',
'latest_version_url'}, ...]
"""
query = _get_query_string(meeting_id, SearchCategory.INPUT, standard, subgroup)
url = urljoin(CURRENT_MEETING_URL, query)
response = requests.get(url, auth=(MPEG_LOGIN, MPEG_PWD))
......@@ -415,11 +423,13 @@ def get_input_documents(meeting_id, standard=Standard.ALL, subgroup=Subgroup.ALL
parser = MDMSParser()
return parser.parse_input_docs(response.text)
def get_document_details(document_id):
'''
Get more details about a docuemt.
{'submitted_by': {'name', 'email'}, 'title', 'authors_string', 'organizations', 'abstract', 'related_docs', 'ahg', 'sub_group', 'group', 'standard', 'activity', 'documents': [{'path', 'version', 'timestamp'}, ... ]}
'''
"""
Get more details about a docuemt. {'submitted_by': {'name', 'email'}, 'title', 'authors_string', 'organizations',
'abstract', 'related_docs', 'ahg', 'sub_group', 'group', 'standard', 'activity', 'documents': [{'path',
'version', 'timestamp'}, ... ]}
"""
query = '?id={}'.format(document_id)
url = urljoin(DOCUMENT_URL, query)
response = requests.post(url, auth=(MPEG_LOGIN, MPEG_PWD))
......@@ -433,15 +443,15 @@ def get_document_details(document_id):