Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
Dimitri Podborski
AutomationTools
Commits
70dcbd7a
Commit
70dcbd7a
authored
May 20, 2021
by
Dimitri Podborski
😂
Browse files
implement find_documents API
parent
094c7503
Changes
2
Show whitespace changes
Inline
Side-by-side
automation/mdms.py
View file @
70dcbd7a
...
...
@@ -99,7 +99,7 @@ class MDMSParser:
pass
return
None
def
parse_
input
_docs
(
self
,
html
):
def
parse_
search
_docs
(
self
,
html
):
docs
=
[]
soup
=
bs4
.
BeautifulSoup
(
html
,
features
=
'lxml'
)
for
i
in
soup
.
select
(
'br'
):
# replace <br/> with a space, it makes checking headers easier
...
...
@@ -109,16 +109,15 @@ class MDMSParser:
if
not
form
:
print
(
'Error: No form with id="documents" found. Did MDMS layout change?'
)
return
[]
table
=
form
.
find
(
'table'
)
if
not
table
:
print
(
'Error: No table found in form. Did MDMS layout change?'
)
return
[]
table_main
=
form
.
find
(
'table'
)
if
not
table_main
:
print
(
'Error: No main table element found. Did MDMS layout change?'
)
return
None
rows
=
table_main
.
find_all
(
'tr'
,
recursive
=
False
)
rows
=
table
.
find_all
(
'tr'
,
recursive
=
False
)
for
n
in
range
(
len
(
rows
)):
if
n
==
0
:
# check header first
header
=
[
'number'
,
'created'
,
'uploaded'
,
'Group Working Group / SubGroup'
,
'title'
,
'source'
,
'download'
]
header
=
[
'number'
,
'meeting'
,
'created'
,
'Uploaded'
,
'Group SubGroup'
,
'title'
,
'Author(s)'
,
''
]
if
not
self
.
check_table_header
(
header
,
rows
[
n
]):
print
(
'Error: Wrong table header. Did layout of MDMS change?'
)
return
[]
...
...
@@ -134,12 +133,37 @@ class MDMSParser:
container_url
=
urljoin
(
DOCUMENT_URL
,
'?id={}'
.
format
(
mdms_id
))
# get timestamp of the last uploaded version
last_version_uploaded
=
self
.
try_parsing_date
(
cols
[
2
].
text
)
creat
ed_timestamp
=
self
.
try_parsing_date
(
cols
[
1
].
text
)
created_timestamp
=
self
.
try_parsing_date
(
cols
[
2
].
text
)
upload
ed_timestamp
=
self
.
try_parsing_date
(
cols
[
3
].
text
)
# get authors
authors
=
self
.
_get_authors
(
cols
[
6
])
# get latest document link (if available)
latest_url
=
None
if
len
(
cols
)
==
8
:
if
not
cols
[
7
].
find
(
'a'
)
is
None
:
latest_url
=
urljoin
(
CURRENT_MEETING_URL
,
cols
[
7
].
find
(
'a'
)[
'href'
])
docs
.
append
({
'mdms_id'
:
mdms_id
,
'document'
:
cols
[
0
].
text
,
'meeting'
:
cols
[
1
].
text
,
'created'
:
created_timestamp
,
'last_version_uploaded'
:
uploaded_timestamp
,
'sub_group_text'
:
cols
[
4
].
text
,
'title'
:
cols
[
5
].
text
.
strip
(),
'authors'
:
authors
,
'latest_version_url'
:
latest_url
,
'container'
:
container_url
})
except
:
# TODO: catch properly
print
(
'Error: Could not parse search documents data. Did MDMS layout change?'
)
return
[]
return
docs
def
_get_authors
(
self
,
col
):
authors
=
[]
for
entry
in
col
s
[
5
]
.
contents
:
for
entry
in
col
.
contents
:
if
isinstance
(
entry
,
bs4
.
Tag
):
email
=
None
try
:
...
...
@@ -171,6 +195,48 @@ class MDMSParser:
'name'
:
author_data
[
0
],
'email'
:
author_data
[
1
]
})
return
authors
def
parse_input_docs
(
self
,
html
):
docs
=
[]
soup
=
bs4
.
BeautifulSoup
(
html
,
features
=
'lxml'
)
for
i
in
soup
.
select
(
'br'
):
# replace <br/> with a space, it makes checking headers easier
i
.
replace_with
(
' '
)
form
=
soup
.
find
(
'body'
).
find
(
'form'
,
id
=
'documents'
)
if
not
form
:
print
(
'Error: No form with id="documents" found. Did MDMS layout change?'
)
return
[]
table
=
form
.
find
(
'table'
)
if
not
table
:
print
(
'Error: No table found in form. Did MDMS layout change?'
)
return
[]
rows
=
table
.
find_all
(
'tr'
,
recursive
=
False
)
for
n
in
range
(
len
(
rows
)):
if
n
==
0
:
# check header first
header
=
[
'number'
,
'created'
,
'uploaded'
,
'Group Working Group / SubGroup'
,
'title'
,
'source'
,
'download'
]
if
not
self
.
check_table_header
(
header
,
rows
[
n
]):
print
(
'Error: Wrong table header. Did layout of MDMS change?'
)
return
[]
continue
cols
=
rows
[
n
].
find_all
(
'td'
,
recursive
=
False
)
try
:
if
len
(
cols
[
0
].
text
)
==
0
:
continue
# get document ID on MDMS
parsed_href
=
urlparse
(
cols
[
0
].
a
[
'href'
])
mdms_id
=
int
(
parse_qs
(
parsed_href
.
query
)[
'id'
][
0
])
container_url
=
urljoin
(
DOCUMENT_URL
,
'?id={}'
.
format
(
mdms_id
))
# get timestamp of the last uploaded version
last_version_uploaded
=
self
.
try_parsing_date
(
cols
[
2
].
text
)
created_timestamp
=
self
.
try_parsing_date
(
cols
[
1
].
text
)
# get authors
authors
=
self
.
_get_authors
(
cols
[
5
])
# get latest document link (if available)
latest_url
=
None
...
...
@@ -455,6 +521,22 @@ def find_documents(title='',
subgroup
=
Subgroup
.
ALL
):
"""
Find documents using the search URL.
TODO: Fire a POST request to SEARCH_URL and parse the result
"""
raise
NotImplemented
query
=
'?search_title={}'
\
'&search_number={}'
\
'&search_category={}'
\
'&search_author={}'
\
'&search_id_group={}'
\
'&search_sub_group={}'
\
'&id_meeting=0'
\
'&submit=Search'
\
'&meeting=0'
.
format
(
title
,
number
,
category
.
value
,
author
,
group
.
value
,
subgroup
.
value
)
url
=
urljoin
(
SEARCH_URL
,
query
)
response
=
requests
.
post
(
url
,
auth
=
(
MPEG_LOGIN
,
MPEG_PWD
))
if
not
response
.
status_code
==
200
:
print
(
'HTTP response {} != 200'
.
format
(
response
.
status_code
))
print
(
'
\t
{}'
.
format
(
response
.
text
.
replace
(
'
\n
'
,
'
\n\t
'
)))
return
None
parser
=
MDMSParser
()
return
parser
.
parse_search_docs
(
response
.
text
)
systems.py
View file @
70dcbd7a
...
...
@@ -210,7 +210,6 @@ def open_issues(table_entries, test, gitlab_members, meeting_start):
if
'y'
in
user_input
:
new_description
=
helpers
.
get_updated_issue_description
(
issue_with_meta
.
description
,
document
,
document_details
)
current_labels
=
issue_with_meta
.
labels
if
'DocAvailable'
not
in
issue_with_meta
.
labels
:
issue_with_meta
.
labels
.
append
(
'DocAvailable'
)
if
not
test
:
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment