Commit 4337f20d authored by Dimitri Podborski's avatar Dimitri Podborski
Browse files

pip 8 and improve author name extraction

parent b2043309
......@@ -46,6 +46,11 @@ Temporary Items
# Local History for Visual Studio Code
.history/
####################################
# IntelliJ
####################################
.idea/*
####################################
# Python
####################################
......
......@@ -66,7 +66,8 @@ class MDMSParser:
return []
return meetings
def parse_author_entry(self, author_entry):
@staticmethod
def parse_author_entry(author_entry):
"""
Search entry string for an email, remove it from the name and clean up
Return a tuple('name', 'email')
......@@ -75,18 +76,19 @@ class MDMSParser:
if len(author_entry) == 0:
return None
email = None
match = re.search(r'[\w\.-]+@[\w\.-]+', author_entry)
match = re.search(r'[\w.-]+@[\w.-]+', author_entry)
if match: # email found
email = match.group(0)
author_entry = author_entry.replace(email, '') # remove email from the name
# remove everything what is inside () or []
author_entry = re.sub(r'[\(\[].*?[\)\]]', '', author_entry)
# remove all non ASCII characters
author_entry = re.sub(r'[^\x00-\x7F]+', '', author_entry)
author_entry = re.sub(r'[(\[].*?[)\]]', '', author_entry)
# remove everything which is not a letter and space
author_entry = re.sub(r'[^a-zA-Z\s]+', '', author_entry)
author_entry = author_entry.strip()
return author_entry, email
def try_parsing_date(self, text):
@staticmethod
def try_parsing_date(text):
"""
Try parsing the timestamp, if not possible return None
"""
......@@ -288,7 +290,8 @@ class MDMSParser:
timestamp = self.try_parsing_date(entry[pos2 + 6:pos3])
return details
def check_table_header(self, template, header_row):
@staticmethod
def check_table_header(template, header_row):
"""
Check if header_row contains the same data as the template
"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment