#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
import inspect
import json
import os
import openreview
import re
import datetime
import csv
from pylatexenc.latexencode import utf8tolatex, unicode_to_latex, UnicodeToLatexConversionRule, UnicodeToLatexEncoder, RULE_REGEX
import unicodedata
from Crypto.Hash import HMAC, SHA256
from multiprocessing import Pool, cpu_count
from tqdm import tqdm
import tld
import urllib.parse as urlparse
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
import random
import string
from deprecated.sphinx import deprecated
import jwt
# --- URL Constants ---
PROD_API_V1 = 'https://api.openreview.net'
PROD_API_V2 = 'https://api2.openreview.net'
PROD_SITE = 'https://openreview.net'
DEV_API_V1 = 'https://api.dev.openreview.net'
DEV_API_V2 = 'https://api2.dev.openreview.net'
DEV_SITE = 'https://dev.openreview.net'
LOCAL_API_V1 = os.environ.get('OPENREVIEW_API_V1_URL', 'http://localhost:3000')
LOCAL_API_V2 = os.environ.get('OPENREVIEW_API_V2_URL', 'http://localhost:3001')
LOCAL_SITE = os.environ.get('OPENREVIEW_WEB_URL', 'http://localhost:3030')
# Remote-only lists (exclude localhost) used by client guards
V1_REMOTE_URLS = [PROD_API_V1, DEV_API_V1]
V2_REMOTE_URLS = [PROD_API_V2, DEV_API_V2]
def _identify_environment(baseurl):
"""Return 'dev', 'prod', or 'local' based on baseurl."""
if any(url in baseurl for url in [DEV_API_V1, DEV_API_V2]):
return 'dev'
if any(url in baseurl for url in [PROD_API_V1, PROD_API_V2]):
return 'prod'
return 'local'
[docs]
def decision_to_venue(venue_id, decision_option, accept_options=None):
"""
Returns the venue for a submission based on its decision
:param venue_id: venue's short name (i.e., ICLR 2022)
:type venue_id: string
:param decision_option: paper decision (i.e., Accept, Reject)
:type decision_option: string
:param accept_options: accept decisions (i.e., [ Accept (Best Paper), Invite to Archive ])
:type accept_options: list
"""
venue = venue_id
if is_accept_decision(decision_option, accept_options):
decision = decision_option.replace('Accept', '') if 'Accept' in decision_option else decision_option
decision = re.sub(r'[()\W]+', '', decision)
if decision:
venue += ' ' + decision.strip()
else:
venue = f'Submitted to {venue}'
return venue
[docs]
def is_accept_decision(decision, accept_options=None):
"""
Checks if decision is an accept decision
:param decision: paper decision (i.e., Accept, Reject)
:type decision: string
:param accept_options: accept decisions (i.e., [ Accept (Best Paper), Invite to Archive ])
:type accept_options: list
"""
if (accept_options and decision in accept_options) or (not accept_options and 'Accept' in decision):
return True
return False
[docs]
def run_once(f):
"""
Decorator to run a function only once and return its output for any subsequent call to the function without running
it again
"""
def wrapper(*args, **kwargs):
if not wrapper.has_run:
wrapper.has_run = True
wrapper.to_return = f(*args, **kwargs)
return wrapper.to_return
wrapper.has_run = False
return wrapper
def format_params(params):
if isinstance(params, dict):
formatted_params = {}
for key, value in params.items():
formatted_params[key] = format_params(value)
return formatted_params
if isinstance(params, list):
formatted_params = []
for value in params:
formatted_params.append(format_params(value))
return formatted_params
if isinstance(params, bool):
return json.dumps(params)
return params
[docs]
def concurrent_requests(request_func, params, desc='Gathering Responses', max_workers=None):
"""
Returns a list of results given for each request_func param execution. It shows a progress bar to know the progress of the task.
:param request_func: a function to execute for each value of the list.
:type request_func: function
:param params: a list of values to be executed by request_func.
:type params: list
:param desc: description to show in the progress bar.
:type desc: str
:param max_workers: number of workers to use in the ThreadPoolExecutor, default value is min(16, cpu_count() * 5).
:type max_workers: int
:return: A list of results given for each func value execution
:rtype: list
"""
if max_workers is None:
max_workers = min(16, (cpu_count() or 1) * 5)
futures = []
gathering_responses = tqdm(total=len(params), desc=desc)
results = []
with ThreadPoolExecutor(max_workers=max_workers) as executor:
for param in params:
futures.append(executor.submit(request_func, param))
for future in futures:
gathering_responses.update(1)
results.append(future.result())
gathering_responses.close()
return results
[docs]
def get_profile(client, value, with_publications=False):
"""
Get a single profile (a note) by id, if available
:param client: User that will retrieve the profile
:type client: Client
:param value: e-mail or id of the profile
:type value: str
:return: Profile with that matches the value passed as parameter
:rtype: Profile
"""
profile = None
try:
profile = client.get_profile(value)
if with_publications:
baseurl_v1, baseurl_v2 = get_base_urls(client)
client_v1 = openreview.Client(baseurl=baseurl_v1, token=client.token)
#client_v2 = openreview.api.OpenReviewClient(baseurl=baseurl_v2, token=client.token)
notes_v1 = list(iterget_notes(client_v1, content={'authorids': profile.id}))
#notes_v2 = list(iterget_notes(client_v2, content={'authorids': profile.id}))
profile.content['publications'] = notes_v1 #+ notes_v2
except openreview.OpenReviewException as e:
# throw an error if it is something other than "not found"
if 'Profile Not Found' not in e.args[0]:
raise e
return profile
[docs]
def get_profiles(client, ids_or_emails, with_publications=False, with_relations=False, with_preferred_emails=None, as_dict=False):
'''
Helper function that repeatedly queries for profiles, given IDs and emails.
Useful for getting more Profiles than the server will return by default (1000)
:param with_preferred_emails: invitation id to get the edges where the preferred emails are stored
:type with_preferred_emails: str
'''
ids = []
emails = []
for member in ids_or_emails:
if '~' in member:
ids.append(member)
else:
emails.append(member)
profile_by_id = {}
profile_by_id_or_email = {}
def process_profile(profile, email=None):
profile_by_id[profile.id] = profile
for name in profile.content.get("names", []):
if name.get("username"):
profile_by_id_or_email[name.get("username")] = profile
if email:
profile_by_id_or_email[email] = profile
batch_size = 1000
## Get profiles by id and add them to the profiles list
for i in range(0, len(ids), batch_size):
batch_ids = ids[i:i+batch_size]
batch_profiles = client.search_profiles(ids=batch_ids)
for profile in batch_profiles:
process_profile(profile)
## Get profiles by email and add them to the profiles list
for j in range(0, len(emails), batch_size):
batch_emails = emails[j:j+batch_size]
batch_profile_by_email = client.search_profiles(confirmedEmails=batch_emails)
for email, profile in batch_profile_by_email.items():
process_profile(profile, email)
for email in emails:
if email not in profile_by_id_or_email:
profile = openreview.Profile(
id=email,
content={
'emails': [email],
'preferredEmail': email,
'emailsConfirmed': [email],
'names': []
})
profile_by_id[profile.id] = profile
profile_by_id_or_email[email] = profile
## Get publications for all the profiles
profiles = list(profile_by_id.values())
if with_publications:
baseurl_v1, baseurl_v2 = get_base_urls(client)
client_v1 = openreview.Client(baseurl=baseurl_v1, token=client.token)
client_v2 = openreview.api.OpenReviewClient(baseurl=baseurl_v2, token=client.token)
# Fetch publications from both APIs in parallel per profile
from concurrent.futures import ThreadPoolExecutor
def get_publications(profile):
with ThreadPoolExecutor(max_workers=2) as executor:
future_v1 = executor.submit(client_v1.get_all_notes, content={'authorids': profile.id})
future_v2 = executor.submit(client_v2.get_all_notes, content={'authorids': profile.id})
pubs_v1 = future_v1.result()
pubs_v2 = future_v2.result()
return pubs_v1 + pubs_v2
publications_all = concurrent_requests(get_publications, profiles, desc='Loading publications from both APIs')
for idx, publications in enumerate(publications_all):
profiles[idx].content['publications'] = publications
if with_relations:
relation_profile_ids = set()
for profile in profiles:
relation_usernames = [relation.get('username') for relation in profile.content.get('relations', []) if relation.get('username')]
relation_emails = [relation.get('email') for relation in profile.content.get('relations', []) if relation.get('email')]
relation_profile_ids.update(relation_usernames)
relation_profile_ids.update(relation_emails)
relation_profiles_by_id = get_profiles(client, list(relation_profile_ids), as_dict=True)
for profile in profiles:
for relation in profile.content.get('relations', []):
relation_profile = relation_profiles_by_id.get(relation.get('username')) or relation_profiles_by_id.get(relation.get('email'))
if relation_profile:
relation['profile_id'] = relation_profile.id
if with_preferred_emails is not None:
preferred_email_by_id = { g['id']['head']: g['values'][0]['tail'] for g in client.get_grouped_edges(invitation=with_preferred_emails, groupby='head', select='tail')}
for profile in profiles:
preferred_email = preferred_email_by_id.get(profile.id)
if preferred_email:
profile.content['preferredEmail'] = preferred_email
if as_dict:
profiles_as_dict = {}
for id in ids:
profiles_as_dict[id] = profile_by_id_or_email.get(id)
for email in emails:
profiles_as_dict[email] = profile_by_id_or_email.get(email)
return profiles_as_dict
return profiles
[docs]
def get_group(client, id):
"""
Get a single Group by id if available
:param client: User that will retrieve the group
:type client: Client
:param id: id of the group
:type id: str
:return: Group that matches the passed id
:rtype: Group
"""
group = None
try:
group = client.get_group(id = id)
except openreview.OpenReviewException as e:
# throw an error if it is something other than "not found"
error = e.args[0]
if error.get('name') == 'NotFoundError' or error.get('message').startswith('Group Not Found'):
return None
else:
raise e
return group
[docs]
def get_note(client, id):
"""
Get a single Note by id if available
:param client: User that will retrieve the note
:type client: Client
:param id: id of the note
:type id: str
:return: Note that matches the passed id
:rtype: Note
"""
note = None
try:
note = client.get_note(id = id)
except openreview.OpenReviewException as e:
# throw an error if it is something other than "not found"
error = e.args[0]
if error.get('name') == 'NotFoundError' or error.get('message').startswith('Note Not Found'):
return None
else:
raise e
return note
[docs]
def get_invitation(client, id):
"""
Get a single Invitation by id if available
:param client: User that will retrieve the invitation
:type client: Client
:param id: id of the invitation
:type id: str
:return: Invitation that matches the passed id or None if it does not exist or it is expired
:rtype: Invitation
"""
invitation = None
try:
invitation = client.get_invitation(id = id)
except openreview.OpenReviewException as e:
print('Can not retrieve invitation', e)
return invitation
[docs]
def create_profile(client, email, fullname, super_user='openreview.net'):
"""
Given email, first name, last name, and middle name (optional), creates a new profile.
:param client: User that will create the Profile
:type client: Client
:param email: Preferred e-mail in the Profile
:type email: str
:param fullname: Full name of the user
:type fullname: str
:param super_user: Super user of the system
:type super_user: str
:return: The created Profile
:rtype: Profile
"""
profile = get_profile(client, email)
if profile:
raise openreview.OpenReviewException('There is already a profile with this email address: {}'.format(email))
username_response = client.get_tildeusername(fullname)
tilde_id = username_response['username']
tilde_group = openreview.api.Group(id=tilde_id, signatures=[client.profile.id], signatories=[tilde_id], readers=[tilde_id], writers=[client.profile.id], members=[email])
email_group = openreview.api.Group(id=email, signatures=[client.profile.id], signatories=[email], readers=[email], writers=[client.profile.id], members=[tilde_id])
profile_content = {
'emails': [email],
'preferredEmail': email,
'names': [
{
'fullname': fullname,
'username': tilde_id
}
],
}
client.post_group_edit(
f'{super_user}/-/Username',
signatures=[super_user],
readers=[tilde_id],
writers=[super_user],
group=tilde_group
)
client.post_group_edit(
f'{super_user}/-/Email',
signatures=[super_user],
readers=[tilde_id],
writers=[super_user],
group=email_group
)
profile = client.post_profile(openreview.Profile(id=tilde_id, content=profile_content, signatures=[tilde_id]))
return profile
def create_authorid_profiles(client, note):
# for all submissions get authorids, if in form of email address, try to find associated profile
# if profile doesn't exist, create one
created_profiles = []
if not 'authors' in note.content or not 'authorids' in note.content:
return created_profiles
author_names = [a.replace('*', '') for a in note.content['authors']['value']]
author_emails = [e for e in note.content['authorids']['value']]
if len(author_names) != len(author_emails):
print('{}: length mismatch. authors ({}), authorids ({})'.format(
note.id,
len(author_names),
len(author_emails)
))
return created_profiles
# iterate through authorids and authors at the same time
for (author_id, author_name) in zip(author_emails, author_names):
author_id = author_id.strip()
author_name = author_name.strip()
if '@' in author_id:
try:
profile = create_profile(client=client, email=author_id, fullname=author_name)
created_profiles.append(profile)
print('{}: profile created with id {}'.format(note.id, profile.id))
except openreview.OpenReviewException as e:
print('Error while creating profile for note id {note_id}, author {author_id}, '.format(note_id=note.id, author_id=author_id), e)
return created_profiles
[docs]
def get_preferred_name(profile, last_name_only=False):
"""
Accepts openreview.Profile object
:param profile: Profile from which the preferred name will be retrieved
:type profile: Profile
:return: User's preferred name, if available, or the first listed name if not available.
:rtype: str
"""
names = profile.content['names']
preferred_names = [n for n in names if n.get('preferred', False)]
if preferred_names:
primary_preferred_name = preferred_names[0]
else:
primary_preferred_name = names[0]
if last_name_only:
return primary_preferred_name['fullname'].split(' ')[-1]
return primary_preferred_name['fullname']
[docs]
def generate_bibtex(note, venue_fullname, year, url_forum=None, paper_status='under review', anonymous=True, names_reversed=False, baseurl='https://openreview.net', editor=None):
"""
Generates a bibtex field for a given Note.
:param note: Note from which the bibtex is generated
:type note: Note
:param venue_fullname: Full name of the venue to be placed in the book title field
:type venue_fullname: str
:param year: Note year
:type year: str
:param url_forum: Forum id, if none is provided, it is obtained from the note parameter: note.forum
:type url_forum: str, optional
:param paper_status: Used to indicate the status of a paper: ["accepted", "rejected" or "under review"]
:type paper_status: string, optional
:param anonymous: Used to indicate whether or not the paper's authors should be revealed
:type anonymous: bool, optional
:param names_reversed: If true, it indicates that the last name is written before the first name
:type names_reversed: bool, optional
:param baseurl: Base url where the bibtex is from. Default https://openreview.net
:type baseurl: str, optional
:return: Note bibtex
:rtype: str
"""
note_title = note.content['title'] if isinstance(note.content['title'], str) else note.content['title']['value']
first_word = re.sub('[^a-zA-Z]', '', note_title.split(' ')[0].lower())
forum = note.forum if not url_forum else url_forum
if anonymous:
first_author_last_name = 'anonymous'
authors = 'Anonymous'
else:
note_author_list = note.content['authors'] if isinstance(note.content['authors'], list) else note.content['authors']['value']
first_author_last_name = note_author_list[0].split(' ')[-1].lower()
if names_reversed:
# last, first
author_list = []
for name in note_author_list:
last = name.split(' ')[-1]
rest = (' ').join(name.split(' ')[:-1])
author_list.append(last+', '+rest)
authors = ' and '.join(author_list)
else:
authors = ' and '.join(note_author_list)
u = UnicodeToLatexEncoder(
conversion_rules=[
UnicodeToLatexConversionRule(
rule_type=RULE_REGEX,
rule=[
(re.compile(r'[A-Z]{2,}'), r'{\g<0>}')
]),
'defaults'
]
)
bibtex_title = u.unicode_to_latex(note_title)
bibtex_key = unicodedata.normalize('NFKD',first_author_last_name + year + first_word + ',').encode("ascii", "ignore").decode("ascii")
if paper_status == 'under review':
under_review_bibtex = [
'@inproceedings{',
bibtex_key,
'title={' + bibtex_title + '},',
'author={' + utf8tolatex(authors) + '},',
'booktitle={Submitted to ' + utf8tolatex(venue_fullname) + '},',
'year={' + year + '},',
'url={'+baseurl+'/forum?id=' + forum + '},',
'note={under review}',
'}'
]
return '\n'.join(under_review_bibtex)
if paper_status == 'accepted':
accepted_bibtex = [
'@inproceedings{',
bibtex_key,
'title={' + bibtex_title + '},',
'author={' + utf8tolatex(authors) + '},',
'booktitle={' + utf8tolatex(venue_fullname) + '},'
]
if editor:
accepted_bibtex.append('editor={' + utf8tolatex(editor) + '},')
accepted_bibtex = accepted_bibtex + [
'year={' + year + '},',
'url={'+baseurl+'/forum?id=' + forum + '}',
'}'
]
return '\n'.join(accepted_bibtex)
if paper_status == 'rejected':
rejected_bibtex = [
'@misc{',
bibtex_key,
'title={' + bibtex_title + '},',
'author={' + utf8tolatex(authors) + '},',
'year={' + year + '},',
'url={'+baseurl+'/forum?id=' + forum + '}',
'}'
]
return '\n'.join(rejected_bibtex)
@run_once
def load_duplicate_domains():
dir_path = os.path.dirname(os.path.realpath(__file__))
with open(os.path.join(dir_path, 'duplicate_domains.json')) as f:
duplicate_domains = json.load(f)
f.close()
return duplicate_domains
[docs]
def subdomains(domain):
"""
Given an email address, returns a list with the domains and subdomains.
:param domain: e-mail address or domain of the e-mail address
:type domain: str
:return: List of domains and subdomains
:rtype: list[str]
Example:
>>> subdomains('johnsmith@iesl.cs.umass.edu')
[u'iesl.cs.umass.edu', u'cs.umass.edu', u'umass.edu']
"""
duplicate_domains: dict = load_duplicate_domains()
domain_components = [c for c in domain.split('.') if c and not c.isspace()]
domains = ['.'.join(domain_components[index:len(domain_components)]) for index, path in enumerate(domain_components)]
valid_domains = set()
for d in domains:
if not tld.is_tld(d):
valid_domains.add(duplicate_domains.get(d, d))
return sorted(valid_domains)
[docs]
def get_paperhash(first_author, title):
"""
Returns the paperhash of a paper, given the title and first author.
:param first_author: First author that appears on the paper
:type first_author: str
:param title: Title of the paper
:type title: str
:return: paperhash, see example
:rtype: str
Example:
>>> get_paperhash('David Soergel', 'Open Scholarship and Peer Review: a Time for Experimentation')
u'soergel|open_scholarship_and_peer_review_a_time_for_experimentation'
"""
title = title.strip()
strip_punctuation = r'[^A-zÀ-ÿ\d\s]'
title = re.sub(strip_punctuation, '', title)
first_author = re.sub(strip_punctuation, '', first_author)
first_author = first_author.split(' ').pop()
title = re.sub(strip_punctuation, '', title)
title = re.sub('\r|\n', '', title)
title = re.sub(r'\s+', '_', title)
first_author = re.sub(strip_punctuation, '', first_author)
return (first_author + '|' + title).lower()
[docs]
def replace_members_with_ids(client, group):
"""
Given a Group object, iterates through the Group's members and, for any member represented by an email address, attempts to find a profile associated with that email address. If a profile is found, replaces the email with the profile id.
:param client: Client used to get the Profiles and to post the new Group
:type client: Client
:param group: Group for which the profiles will be updated
:type group: Group
:return: Group with the emails replaced by Profile ids
:rtype: Group
"""
updated_members = []
without_profile_ids = []
member_profiles = get_profiles(client, group.members, as_dict=True)
for member in group.members:
profile = member_profiles.get(member)
if profile is not None:
updated_members.append(profile.id)
elif member.startswith('~'):
without_profile_ids.append(member)
else:
updated_members.append(member)
if without_profile_ids:
raise openreview.OpenReviewException(f"Profile Not Found for {without_profile_ids}")
group.members = updated_members
if getattr(client, 'post_group', None):
return client.post_group(group)
if getattr(client, 'post_group_edit', None):
client.post_group_edit(
invitation = group.domain + '/-/Edit',
readers = [group.domain],
writers = [group.domain],
signatures = [group.domain],
group = openreview.api.Group(
id = group.id,
members = list(set(group.members))
),
flush_members_cache=False
)
return client.get_group(group.id)
[docs]
def concurrent_get(client, get_function, **params):
"""
Given a function that takes a single parameter, returns a list of results.
:param client: Client used to make requests
:param get_function: Function that takes a that performs the request
:type get_function: function
:param params: Parameters to pass to the get_function
:type params: dict
:return: List of results
:rtype: list
"""
max_workers = min(16, (cpu_count() or 1) * 5)
if (params.get('limit') or float('inf')) <= client.limit:
docs = get_function(**params)
return docs
else:
get_count_params = params.copy()
if get_count_params.get('offset') is not None:
get_count_params.pop('offset')
get_count_params['with_count'] = True
get_count_params['limit'] = 1
_, count = get_function(**get_count_params)
params['with_count'] = False
limit = params.get('limit')
if (limit or client.limit) > client.limit:
params.pop('limit')
docs = get_function(**params)
offset = params.get('offset') or 0
if (count - offset) <= client.limit:
return docs
start = offset + client.limit
if limit is None:
end = count
else:
end = min(offset + limit, count)
offset_list = list(range(start, end, client.limit))
futures = []
gathering_responses = tqdm(total=len(offset_list), desc='Gathering Responses')
with ThreadPoolExecutor(max_workers=max_workers) as executor:
for count, offset in enumerate(offset_list):
params['offset'] = offset
if (count + 1) == len(offset_list) and (end - offset) > 0:
params['limit'] = end - offset
futures.append(executor.submit(get_function, **params))
for future in futures:
gathering_responses.update(1)
docs.extend(future.result())
gathering_responses.close()
return docs
[docs]
class iterget:
"""
This class can create an iterator from a getter method that returns a list. Below all the iterators that can be created from a getter method:
:meth:`openreview.Client.get_tags` --> :func:`tools.iterget_tags`
:meth:`openreview.Client.get_notes` --> :func:`tools.iterget_notes`
:meth:`openreview.Client.get_references` --> :func:`tools.iterget_references`
:meth:`openreview.Client.get_invitations` --> :func:`tools.iterget_invitations`
:meth:`openreview.Client.get_groups` --> :func:`tools.iterget_groups`
:param get_function: Any of the aforementioned methods
:type get_function: function
:param params: Dictionary containing parameters for the corresponding method. Refer to the passed method documentation for details
:type params: dict
"""
def __init__(self, get_function, **params):
self.offset = 0
self.last_batch = False
self.batch_finished = False
self.obj_index = 0
self.params = params
self.params.update({
'offset': self.offset,
'limit': params.get('limit') or 1000
})
self.get_function = get_function
self.current_batch = self.get_function(**self.params)
def update_batch(self):
self.offset += self.params['limit']
self.params['offset'] = self.offset
next_batch = self.get_function(**self.params)
if next_batch:
self.current_batch = next_batch
else:
self.current_batch = []
def __iter__(self):
return self
def __next__(self):
if len(self.current_batch) == 0:
raise StopIteration
else:
next_obj = self.current_batch[self.obj_index]
if (self.obj_index + 1) == len(self.current_batch):
self.update_batch()
self.obj_index = 0
else:
self.obj_index += 1
return next_obj
next = __next__
[docs]
class efficient_iterget:
"""
This class can create an iterator from a getter method that returns a list. Below all the iterators that can be created from a getter method:
:meth:`openreview.Client.get_tags` --> :func:`tools.iterget_tags`
:meth:`openreview.Client.get_notes` --> :func:`tools.iterget_notes`
:meth:`openreview.Client.get_references` --> :func:`tools.iterget_references`
:meth:`openreview.Client.get_invitations` --> :func:`tools.iterget_invitations`
:meth:`openreview.Client.get_groups` --> :func:`tools.iterget_groups`
:param get_function: Any of the aforementioned methods
:type get_function: function
:param params: Dictionary containing parameters for the corresponding method. Refer to the passed method documentation for details
:type params: dict
"""
def __init__(self, get_function, desc='Gathering Responses', **params):
self.obj_index = 0
self.params = params
self.params.update({
'with_count': True,
'sort': params.get('sort') or 'id',
'limit': params.get('limit') or 1000
})
self.get_function = get_function
self.current_batch, total = self.get_function(**self.params)
self.gathering_responses = tqdm(total=total, desc=desc) if total > self.params['limit'] else None
def update_batch(self):
after = self.current_batch[-1].id
self.params['after'] = after
self.params['with_count'] = False
next_batch = self.get_function(**self.params)
if next_batch:
self.current_batch = next_batch
else:
self.current_batch = []
def __iter__(self):
return self
def __next__(self):
if len(self.current_batch) == 0:
if self.gathering_responses:
self.gathering_responses.close()
raise StopIteration
else:
next_obj = self.current_batch[self.obj_index]
if (self.obj_index + 1) == len(self.current_batch):
self.update_batch()
self.obj_index = 0
else:
if self.gathering_responses:
self.gathering_responses.update(1)
self.obj_index += 1
return next_obj
next = __next__
[docs]
def iterget_messages(client, to = None, subject = None, status = None):
"""
Returns an iterator over Messages ignoring API limit.
Example:
>>> iterget_messages(client, to='melisa@mail.com')
:return: Iterator over Messages filtered by the provided parameters
:rtype: iterget
"""
params = {
'to': to,
'subject': subject,
'status': status
}
return iterget(client.get_messages, **params)
[docs]
def iterget_edges (client,
invitation = None,
head = None,
tail = None,
label = None,
limit = None,
trash = None):
"""Return an iterator over Edges, bypassing API pagination limits.
Fetches all matching edges across multiple API pages transparently. Use this
instead of ``client.get_edges()`` when the result set may exceed the single-request limit.
:param client: Client used to get the Edges.
:type client: Client
:param invitation: An Invitation ID. If provided, returns Edges whose invitation field matches.
:type invitation: str, optional
:param head: A head entity ID. If provided, returns Edges whose head field matches.
:type head: str, optional
:param tail: A tail entity ID. If provided, returns Edges whose tail field matches.
:type tail: str, optional
:param label: If provided, returns Edges whose label field matches.
:type label: str, optional
:param limit: Maximum number of Edges to return. If None, returns all matching Edges.
:type limit: int, optional
:param trash: If True, includes Edges that have been deleted.
:type trash: bool, optional
:return: Iterator over Edge objects matching the provided filters.
:rtype: iterget
"""
params = {}
if invitation is not None:
params['invitation'] = invitation
if head is not None:
params['head'] = head
if tail is not None:
params['tail'] = tail
if label is not None:
params['label'] = label
if limit is not None:
params['limit'] = limit
if trash == True:
params['trash']=True
return iterget(client.get_edges, **params)
[docs]
def iterget_grouped_edges(
client,
invitation=None,
groupby='head',
select='id,tail,label,weight',
logger=None
):
'''Helper function for retrieving and parsing all edges in bulk'''
## Backend has pagination temporally disabled, it returns all the groups now so we need to do one iteration.
grouped_edges_iterator = client.get_grouped_edges(invitation=invitation, groupby=groupby, select=select)
for group in grouped_edges_iterator:
group_edges = []
for group_values in group['values']:
edge_params = {
'readers': [],
'writers': [],
'signatures': [],
'invitation': invitation
}
edge_params.update(group_values)
edge_params.update(group['id'])
group_edges.append(openreview.Edge(**edge_params))
yield group_edges
[docs]
@deprecated(version='1.52.6', reason="Use client.get_all_notes() instead")
def iterget_notes(client,
id = None,
paperhash = None,
forum = None,
invitation = None,
replyto = None,
tauthor = None,
signature = None,
writer = None,
trash = None,
number = None,
mintcdate = None,
content = None,
details = None,
sort = None):
"""
Returns an iterator over Notes filtered by the provided parameters ignoring API limit.
:param client: Client used to get the Notes
:type client: Client
:param id: a Note ID. If provided, returns Notes whose ID matches the given ID.
:type id: str, optional
:param paperhash: a "paperhash" for a note. If provided, returns Notes whose paperhash matches this argument. (A paperhash is a human-interpretable string built from the Note's title and list of authors to uniquely identify the Note)
:type paperhash: str, optional
:param forum: a Note ID. If provided, returns Notes whose forum matches the given ID.
:type forum: str, optional
:param invitation: an Invitation ID. If provided, returns Notes whose "invitation" field is this Invitation ID.
:type invitation: str, optional
:param replyto: a Note ID. If provided, returns Notes whose replyto field matches the given ID.
:type replyto: str, optional
:param tauthor: a Group ID. If provided, returns Notes whose tauthor field ("true author") matches the given ID, or is a transitive member of the Group represented by the given ID.
:type tauthor: str, optional
:param signature: a Group ID. If provided, returns Notes whose signatures field contains the given Group ID.
:type signature: str, optional
:param writer: a Group ID. If provided, returns Notes whose writers field contains the given Group ID.
:type writer: str, optional
:param trash: If True, includes Notes that have been deleted (i.e. the ddate field is less than the current date)
:type trash: bool, optional
:param number: If present, includes Notes whose number field equals the given integer.
:type number: int, optional
:param mintcdate: Represents an Epoch time timestamp in milliseconds. If provided, returns Notes whose "true creation date" (tcdate) is at least equal to the value of mintcdate.
:type mintcdate: int, optional
:param content: If present, includes Notes whose each key is present in the content field and it is equals the given value.
:type content: dict, optional
:param details: TODO: What is a valid value for this field?
:type details: str, optional
:return: Iterator over Notes filtered by the provided parameters
:rtype: iterget
"""
params = {}
if id is not None:
params['id'] = id
if paperhash is not None:
params['paperhash'] = paperhash
if forum is not None:
params['forum'] = forum
if invitation is not None:
params['invitation'] = invitation
if replyto is not None:
params['replyto'] = replyto
if tauthor is not None:
params['tauthor'] = tauthor
if signature is not None:
params['signature'] = signature
if writer is not None:
params['writer'] = writer
if trash == True:
params['trash']=True
if number is not None:
params['number'] = number
if mintcdate is not None:
params['mintcdate'] = mintcdate
if content is not None:
params['content'] = content
if details is not None:
params['details'] = details
params['sort'] = sort
return efficient_iterget(client.get_notes, desc='Getting Notes', **params)
[docs]
@deprecated(version='1.52.6', reason="Use client.get_all_references() instead")
def iterget_references(client, referent = None, invitation = None, mintcdate = None):
"""
Returns an iterator over references filtered by the provided parameters ignoring API limit.
:param client: Client used to get the references
:type client: Client
:param referent: a Note ID. If provided, returns references whose "referent" value is this Note ID.
:type referent: str, optional
:param invitation: an Invitation ID. If provided, returns references whose "invitation" field is this Invitation ID.
:type invitation: str, optional
:param mintcdate: Represents an Epoch time timestamp in milliseconds. If provided, returns references whose "true creation date" (tcdate) is at least equal to the value of mintcdate.
:type mintcdate: int, optional
:return: Iterator over references filtered by the provided parameters
:rtype: iterget
"""
params = {}
if referent is not None:
params['referent'] = referent
if invitation is not None:
params['invitation'] = invitation
if mintcdate is not None:
params['mintcdate'] = mintcdate
return iterget(client.get_references, **params)
[docs]
@deprecated(version='1.52.6', reason="Use client.get_all_invitations() instead")
def iterget_invitations(client, id=None, ids=None, invitee=None, regex=None, tags=None, minduedate=None, duedate=None, pastdue=None, replytoNote=None, replyForum=None, signature=None, note=None, replyto=None, details=None, expired=None, super=None, sort=None):
"""
Returns an iterator over invitations, filtered by the provided parameters, ignoring API limit.
:param client: Client used to get the Invitations
:type client: Client
:param id: an Invitation ID. If provided, returns invitations whose "id" value is this Invitation ID.
:type id: str, optional
:param ids: Comma separated Invitation IDs. If provided, returns invitations whose "id" value is any of the passed Invitation IDs.
:type ids: str, optional
:param invitee: Essentially, invitees field in an Invitation object contains Group Ids being invited using the invitation. If provided, returns invitations whose "invitee" field contains the given string.
:type invitee: str, optional
:param regex: a regular expression string to match Invitation IDs. If provided, returns invitations whose "id" value matches the given regex.
:type regex: str, optional
:param tags: If provided, returns Invitations whose Tags field contains the given Tag IDs.
:type tags: list[str], optional
:param minduedate: Represents an Epoch time timestamp in milliseconds. If provided, returns Invitations whose duedate is at least equal to the value of minduedate.
:type minduedate: int, optional
:param duedate: Represents an Epoch time timestamp in milliseconds. If provided, returns Invitations whose duedate field matches the given duedate.
:type duedate: int, optional
:param pastdue:
:type pastdue: bool, optional
:param replytoNote: a Note ID. If provided, returns Invitations whose replytoNote field contains the given Note ID.
:type replytoNote: str, optional
:param replyForum: a forum ID. If provided, returns Invitations whose forum field contains the given forum ID.
:type replyForum: str, optional
:param signature: a Group ID. If provided, returns Invitations whose signature field contains the given Group ID.
:type signature: str, optional
:param note: a Note ID. If provided, returns Invitations whose note field contains the given Note ID.
:type note: str, optional
:param replyto: a Note ID. If provided, returns Invitations whose replyto field matches the given Note ID.
:type replyto: str, optional
:param details:
:type details: str, optional
:param expired: get also expired invitions, by default returns 'active' invitations.
:type expired: bool, optional
:return: Iterator over Invitations filtered by the provided parameters
:rtype: iterget
"""
params = {}
if id is not None:
params['id'] = id
if ids is not None:
params['ids'] = ids
if invitee is not None:
params['invitee'] = invitee
if regex is not None:
params['regex'] = regex
if tags is not None:
params['tags'] = tags
if minduedate is not None:
params['minduedate'] = minduedate
if duedate is not None:
params['duedate'] = duedate
if pastdue is not None:
params['pastdue'] = pastdue
if details is not None:
params['details'] = details
if replytoNote is not None:
params['replytoNote'] = replytoNote
if replyForum is not None:
params['replyForum'] = replyForum
if signature is not None:
params['signature'] = signature
if note is not None:
params['note'] = note
if replyto is not None:
params['replyto'] = replyto
if super is not None:
params['super'] = super
if expired is not None:
params['expired'] = expired
if sort is not None:
params['sort'] = sort
return efficient_iterget(client.get_invitations, desc='Getting Invitations', **params)
[docs]
@deprecated(version='1.52.6', reason="Use client.get_all_groups() instead")
def iterget_groups(client, id = None, regex = None, member = None, host = None, signatory = None, web = None):
"""
Returns an iterator over groups filtered by the provided parameters ignoring API limit.
:param client: Client used to get the Groups
:type client: Client
:param id: a Note ID. If provided, returns groups whose "id" value is this Group ID.
:type id: str, optional
:param regex: a regular expression string to match Group IDs. If provided, returns groups whose "id" value matches the given regex.
:type regex: str, optional
:param member: Essentially, members field contains Group Ids that are members of this Group object. If provided, returns groups whose "members" field contains the given string.
:type member: str, optional
:param host:
:type host: str, optional
:param signatory: a Group ID. If provided, returns Groups whose signatory field contains the given Group ID.
:type signatory: str, optional
:param web: Groups that contain a web field value
:type web: bool, optional
:return: Iterator over Groups filtered by the provided parameters
:rtype: iterget
"""
params = {}
if id is not None:
params['id'] = id
if regex is not None:
params['regex'] = regex
if member is not None:
params['member'] = member
if host is not None:
params['host'] = host
if signatory is not None:
params['signatory'] = signatory
if web is not None:
params['web'] = web
return efficient_iterget(client.get_groups, desc='Getting Groups', **params)
[docs]
def timestamp_GMT(year, month, day, hour=0, minute=0, second=0):
"""
Given year, month, day, and (optionally) hour, minute, second in GMT time zone:
returns the number of milliseconds between this date and Epoch Time (Jan 1, 1970).
:param year: year >= 1970
:type year: int
:param month: value from 1 to 12
:type month: int
:param day: value from 1 to 28, 29, 30, or 31; depending on the month value.
:type day: int
:param hour: value from 0 to 23
:type hour: int, optional
:param minute: value from 0 to 59
:type minute: int, optional
:param second: value from 0 to 59
:type second: int, optional
:return: Number of milliseconds between the passed date and Epoch Time (Jan 1, 1970)
:rtype: int
>>> timestamp_GMT(1990, 12, 20, hour=12, minute=30, second=24)
661696224000
"""
return datetime_millis(datetime.datetime(year, month, day, hour, minute, second))
[docs]
def datetime_millis(dt):
"""
Converts a datetime to milliseconds.
:param dt: A date that want to be converted to milliseconds
:type dt: datetime
:return: The time from Jan 1, 1970 to the passed date in milliseconds
:rtype: int
"""
if isinstance(dt, datetime.datetime):
return int(dt.timestamp() * 1000)
return dt
[docs]
def recruit_reviewer(client, user, first,
hash_seed,
recruit_reviewers_id,
recruit_message,
recruit_message_subj,
reviewers_invited_id,
contact_info='info@openreview.net',
verbose=True,
replyTo=None,
invitation=None,
signature=None):
"""
Recruit a reviewer. Sends an email to the reviewer with a link to accept or
reject the recruitment invitation.
:param client: Client used to send the e-mail
:type client: Client
:param user: User to whom the e-mail will be sent
:type user: str
:param first: First name of the person to whom e-mail will be sent
:type first: str
:param hash_seed: a random number for seeding the hash.
:type hash_seed: int
:param recruit_message: a formattable string containing the following string variables: (name, accept_url, decline_url)
:type recruit_message: str
:param recruit_message_subj: subject line for the recruitment email
:type recruit_message_subj: str
:param reviewers_invited_id: group ID for the "Reviewers Invited" group, often used to keep track of which reviewers have already been emailed. str
:type reviewers_invited_id: str
:param contact_info: The information used to contact support for questions
:type contact_info: str
:param verbose: Shows response of :meth:`openreview.Client.post_message` and shows the body of the message sent
:type verbose: bool, optional
:param baseurl: Use this baseUrl instead of client.baseurl to create recruitment links
:type baseurl: str, optional
"""
# the HMAC.new() function only accepts bytestrings, not unicode.
# In Python 3, all strings are treated as unicode by default, so we must call encode on
# these unicode strings to convert them to bytestrings. This behavior is the same in
# Python 2, because we imported unicode_literals from __future__.
hashkey = HMAC.new(hash_seed.encode('utf-8'), msg=user.encode('utf-8'), digestmod=SHA256).hexdigest()
baseurl = 'https://openreview.net' #Always pointing to the live site so we don't send more invitations with localhost
# build the URL to send in the message
url = '{baseurl}/invitation?id={recruitment_inv}&user={user}&key={hashkey}'.format(
baseurl = baseurl if baseurl else client.baseurl,
recruitment_inv = recruit_reviewers_id,
user = urlparse.quote(user),
hashkey = hashkey
)
# format the message defined above
personalized_message = recruit_message.replace("{{fullname}}", first) if first else recruit_message
personalized_message = personalized_message.replace("{{accept_url}}", url + "&response=Yes")
personalized_message = personalized_message.replace("{{decline_url}}", url + "&response=No")
personalized_message = personalized_message.replace("{{invitation_url}}", url)
personalized_message = personalized_message.replace("{{contact_info}}", contact_info)
personalized_message.format()
try:
client.add_members_to_group(reviewers_invited_id, [user])
except openreview.OpenReviewException as e:
raise e
# send the email through openreview
if invitation is not None:
response = client.post_message(recruit_message_subj, [user], personalized_message, parentGroup=reviewers_invited_id, replyTo=replyTo, invitation=invitation, signature=signature)
else:
response = client.post_message(recruit_message_subj, [user], personalized_message, parentGroup=reviewers_invited_id, replyTo=replyTo)
if verbose:
print("Sent to the following: ", response)
print(personalized_message)
[docs]
def recruit_user(client, user,
hash_seed,
recruitment_message_subject,
recruitment_message_content,
recruitment_invitation_id,
comittee_invited_id,
contact_email,
message_invitation,
message_signature,
name=None):
"""Send a recruitment email to a user with a personalized acceptance link.
Generates an HMAC-based hash key for the user, builds a unique recruitment
URL, personalizes the message template by replacing ``{{fullname}}``,
``{{invitation_url}}``, and ``{{contact_info}}`` placeholders, and sends
the email via ``client.post_message()``.
:param client: Client used to send the recruitment email.
:type client: Client
:param user: Email address or profile ID of the user to recruit.
:type user: str
:param hash_seed: Secret seed used to generate the HMAC hash key for the recruitment link.
:type hash_seed: str
:param recruitment_message_subject: Subject line for the recruitment email.
:type recruitment_message_subject: str
:param recruitment_message_content: Message body template. Supports ``{{fullname}}``, ``{{invitation_url}}``, and ``{{contact_info}}`` placeholders.
:type recruitment_message_content: str
:param recruitment_invitation_id: Invitation ID used in the recruitment URL.
:type recruitment_invitation_id: str
:param comittee_invited_id: Group ID for the invited committee group, used as parentGroup for the message. (Note: parameter name is a legacy misspelling of "committee".)
:type comittee_invited_id: str
:param contact_email: Contact email address substituted into ``{{contact_info}}`` and used as the replyTo address.
:type contact_email: str
:param message_invitation: Invitation ID for the message invitation.
:type message_invitation: str
:param message_signature: Signature used when posting the message.
:type message_signature: str
:param name: Full name of the user, used to replace ``{{fullname}}`` in the message.
:type name: str, optional
"""
hashkey = HMAC.new(hash_seed.encode('utf-8'), msg=user.encode('utf-8'), digestmod=SHA256).hexdigest()
url = f'https://openreview.net/invitation?id={recruitment_invitation_id}&user={urlparse.quote(user)}&key={hashkey}'
personalized_message = recruitment_message_content.replace("{{fullname}}", name) if name else recruitment_message_content
personalized_message = personalized_message.replace("{{invitation_url}}", url)
personalized_message = personalized_message.replace("{{contact_info}}", contact_email)
personalized_message.format()
client.post_message(recruitment_message_subject, [user], personalized_message, parentGroup=comittee_invited_id, replyTo=contact_email, invitation=message_invitation, signature=message_signature)
[docs]
def get_user_hash_key(user, hash_seed, invitation=None):
"""Generate a hash key for a user's recruitment or authentication link.
When ``invitation`` is provided, returns a JWT token encoding the user and
invitation. Otherwise, returns an HMAC-SHA256 hex digest keyed by ``hash_seed``.
:param user: Email address or group ID of the user.
:type user: str
:param hash_seed: Secret seed used for HMAC hashing or JWT signing.
:type hash_seed: str
:param invitation: Invitation ID. If provided, a JWT is returned instead of an HMAC hash.
:type invitation: str, optional
:return: JWT token string (if invitation is given) or HMAC-SHA256 hex digest.
:rtype: str
"""
if invitation is not None:
jwt_payload = {
"group": user,
"invitation": invitation,
}
return jwt.encode(jwt_payload, hash_seed, algorithm="HS256")
hashkey = HMAC.new(hash_seed.encode('utf-8'), msg=user.encode('utf-8'), digestmod=SHA256).hexdigest()
return hashkey
def get_user_parse(user, quote=True):
if quote:
return urlparse.quote(user)
return urlparse.unquote(user)
def create_hash_seed():
characters = string.ascii_letters + string.digits # Includes uppercase, lowercase letters, and digits
return ''.join(random.choices(characters, k=16))
[docs]
def get_all_venues(client):
"""
Returns a list of all the venues
:param client: Client used to get all the venues
:type client: Client
:return: List of all the venues represented by a their corresponding Group id
:rtype: list[str]
"""
return client.get_group("host").members
def info_function_builder(policy_function):
def inner(profile, n_years=None, submission_venueid=None):
common_domains = ['gmail.com', 'qq.com', '126.com', '163.com',
'outlook.com', 'hotmail.com', 'yahoo.com', 'foxmail.com', 'aol.com', 'msn.com', 'ymail.com', 'googlemail.com', 'live.com']
argspec = inspect.getfullargspec(policy_function)
if 'submission_venueid' in argspec.args:
result = policy_function(profile, n_years, submission_venueid)
else:
result = policy_function(profile, n_years)
domains = set()
subdomains_dict = {}
for domain in result['domains']:
if domain not in subdomains_dict:
subdomains = openreview.tools.subdomains(domain)
subdomains_dict[domain] = subdomains
domains.update(subdomains_dict[domain])
# Filter common domains
for common_domain in common_domains:
domains.discard(common_domain)
result['domains'] = list(domains)
return result
return inner
[docs]
def get_conflicts(author_profiles, user_profile, policy='default', n_years=None):
"""
Finds conflicts between the passed user Profile and the author Profiles passed as arguments
:param author_profiles: List of Profiles for which an association is to be found
:type author_profiles: list[Profile]
:param user_profile: Profile for which the conflicts will be found
:type user_profile: Profile
:param policy: Policy can be either a function or a string. If it is a function, it will be called with the user Profile and the author Profile as arguments. If it is a string, it will be used to find the corresponding function in the default policy dictionary. If no policy is passed, the default policy will be used.
:type policy: str or function, optional
:param n_years: Number of years to be considered for conflict detection.
:type n_years: int, optional
:return: List containing all the conflicts between the user Profile and the author Profiles
:rtype: list[str]
"""
author_ids = set()
author_domains = set()
author_emails = set()
author_relations = set()
author_publications = set()
if callable(policy):
info_function = info_function_builder(policy)
elif policy == 'NeurIPS':
info_function = info_function_builder(get_neurips_profile_info)
elif policy == 'Comprehensive':
info_function = info_function_builder(get_comprehensive_profile_info)
else:
info_function = info_function_builder(get_profile_info)
for profile in author_profiles:
author_info = info_function(profile, n_years)
author_ids.add(author_info['id'])
author_domains.update(author_info['domains'])
author_emails.update(author_info['emails'])
author_relations.update(author_info['relations'])
author_publications.update(author_info['publications'])
user_info = info_function(user_profile, n_years)
conflicts = set()
conflicts.update(author_ids.intersection(set([user_info['id']])))
conflicts.update(author_domains.intersection(user_info['domains']))
conflicts.update(author_relations.intersection([user_info['id']]))
conflicts.update(author_ids.intersection(user_info['relations']))
conflicts.update(author_emails.intersection(user_info['emails']))
conflicts.update(author_publications.intersection(user_info['publications']))
return list(conflicts)
[docs]
def get_profile_info(profile, n_years=None):
"""
Gets all the domains, emails, relations associated with a Profile
:param profile: Profile from which all the relations will be obtained
:type profile: Profile
:param n_years: Number of years to consider when getting the profile information
:type n_years: int, optional
:return: Dictionary with the domains, emails, and relations associated with the passed Profile
:rtype: dict
"""
domains = set()
emails = set()
relations = set()
publications = set()
if n_years:
cut_off_date = datetime.datetime.now()
cut_off_date = cut_off_date - datetime.timedelta(days=365 * n_years)
cut_off_year = cut_off_date.year
else:
cut_off_year = -1
## Emails section
for email in profile.content['emails']:
# split email
if '@' in email:
domain = email.split('@')[1]
domains.add(domain)
else:
print('Profile with invalid email:', profile.id, email)
## Institution section
for history in profile.content.get('history', []):
try:
end = int(history.get('end', 0) or 0)
except:
end = 0
if not end or (int(end) > cut_off_year):
domain = history.get('institution', {}).get('domain', '')
domains.add(domain)
## Relations section
relations = filter_relations_by_year(profile.content.get('relations', []), cut_off_year)
## Publications section: get publications within last n years, default is all publications from previous years
publications = filter_publications_by_year(profile.content.get('publications', []), cut_off_year)
return {
'id': profile.id,
'domains': domains,
'emails': emails,
'relations': relations,
'publications': publications
}
[docs]
def get_neurips_profile_info(profile, n_years=None):
"""
Gets all the domains, emails, relations associated with a Profile
:param profile: Profile from which all the relations will be obtained
:type profile: Profile
:param n_years: Number of years to consider when getting the profile information
:type n_years: int, optional
:return: Dictionary with the domains, emails, and relations associated with the passed Profile
:rtype: dict
"""
domains = set()
emails=set()
relations = set()
publications = set()
if n_years:
cut_off_date = datetime.datetime.now()
cut_off_date = cut_off_date - datetime.timedelta(days=365 * n_years)
cut_off_year = cut_off_date.year
else:
cut_off_year = -1
## Institution section, get history within the last n years, excluding internships
for h in profile.content.get('history', []):
position = h.get('position')
if not position or (isinstance(position, str) and 'intern' not in position.lower()):
try:
end = int(h.get('end', 0) or 0)
except:
end = 0
if not end or (int(end) > cut_off_year):
domain = h.get('institution', {}).get('domain', '')
domains.add(domain)
## Relations section, get coauthor/coworker relations within the last n years + all the other relations
relations = filter_relations_by_year(profile.content.get('relations', []), cut_off_year, ['Coauthor','Coworker'])
## if institution section is empty, add email domains
if not domains:
for email in profile.content['emails']:
if '@' in email:
domain = email.split('@')[1]
domains.add(domain)
else:
print('Profile with invalid email:', profile.id, email)
## Publications section: get publications within last n years
publications = filter_publications_by_year(profile.content.get('publications', []), cut_off_year)
return {
'id': profile.id,
'domains': domains,
'emails': emails,
'relations': relations,
'publications': publications
}
[docs]
def get_comprehensive_profile_info(profile, n_years=None):
"""
Gets all the domains, emails, relations associated with a Profile
:param profile: Profile from which all the relations will be obtained
:type profile: Profile
:param n_years: Number of years to consider when getting the profile information
:type n_years: int, optional
:return: Dictionary with the domains, emails, and relations associated with the passed Profile
:rtype: dict
"""
domains = set()
emails = set()
relations = set()
publications = set()
if n_years:
cut_off_date = datetime.datetime.now()
cut_off_date = cut_off_date - datetime.timedelta(days=365 * n_years)
cut_off_year = cut_off_date.year
else:
cut_off_year = -1
## Institution section, get history within the last n years
for h in profile.content.get('history', []):
position = h.get('position')
if not position or isinstance(position, str):
try:
end = int(h.get('end', 0) or 0)
except:
end = 0
if not end or (int(end) > cut_off_year):
domain = h.get('institution', {}).get('domain', '')
domains.add(domain)
## Relations section, get all relations within the last n years
relations = filter_relations_by_year(profile.content.get('relations', []), cut_off_year, ['Coauthor','Coworker'])
## if institution section is empty, add email domains
if not domains:
for email in profile.content['emails']:
if '@' in email:
domain = email.split('@')[1]
domains.add(domain)
else:
print('Profile with invalid email:', profile.id, email)
## Publications section: get publications within last n years
publications = filter_publications_by_year(profile.content.get('publications', []), cut_off_year)
return {
'id': profile.id,
'domains': domains,
'emails': emails,
'relations': relations,
'publications': publications
}
[docs]
def get_current_submissions_profile_info(profile, n_years=None, submission_venueid=None):
"""
Gets only submissions submitted to the current venue
:param profile: Profile from which all publications will be obtained
:type profile: Profile
:param submission_venue_id: venue_id of submissions we want to obtain
:type submission_venue_id: str
:return: Dictionary with the current publications associated with the passed Profile
:rtype: dict
"""
domains = set()
relations = set()
publications = set()
if n_years is not None:
cut_off_date = datetime.datetime.now()
cut_off_date = cut_off_date - datetime.timedelta(days=365 * n_years)
cut_off_year = cut_off_date.year
else:
cut_off_year = -1
## Institution section, get history within the last n years, excluding internships
for h in profile.content.get('history', []):
position = h.get('position')
if not position or (isinstance(position, str) and 'intern' not in position.lower()):
try:
end = int(h.get('end', 0) or 0)
except:
end = 0
if not end or (int(end) > cut_off_year):
domain = h.get('institution', {}).get('domain', '')
domains.add(domain)
## Relations section, get coauthor/coworker relations within the last n years + all the other relations
relations = filter_relations_by_year(profile.content.get('relations', []), cut_off_year, ['Coauthor','Coworker'])
## Get publications
for publication in profile.content.get('publications', []):
if isinstance(publication.content.get('venueid'), dict) and publication.content['venueid']['value'] == submission_venueid:
publications.add(publication.id)
return {
'id': profile.id,
'domains': domains,
'emails': set(),
'relations': relations,
'publications': publications
}
def filter_publications_by_year(publications, cut_off_year):
def extract_year(publication_id, timestamp):
try:
return int(datetime.datetime.fromtimestamp(timestamp/1000).year)
except:
print('Error extracting the date for publication: ', publication_id)
return None
## Publications section: get publications within last n years
## 1. try to get the year from the publication date
## 2. if not available, try to get the year from the content year field
## 3. if not available, try to get the year from the creation date
filtered_publications = set()
current_year = datetime.datetime.now().year
for publication in publications:
year = None
if publication.pdate:
year = extract_year(publication.id, publication.pdate)
if not year and 'year' in publication.content:
unformatted_year = None
if isinstance(publication.content['year'], dict) and 'value' in publication.content['year']:
unformatted_year = publication.content['year']['value']
elif isinstance(publication.content['year'], str):
unformatted_year = publication.content['year']
try:
converted_year = int(unformatted_year)
if converted_year <= current_year:
year = converted_year
except Exception as e:
year = None
if not year:
year = extract_year(publication.id, publication.cdate if publication.cdate else publication.tcdate)
if year and year > cut_off_year:
filtered_publications.add(publication.id)
return filtered_publications
def filter_relations_by_year(relations, cut_off_year, only_relations=None):
filtered_relations = set()
for r in relations:
relation_id = r.get('profile_id', r.get('username', r.get('email')))
if relation_id:
end = None
try:
end = int(r.get('end'))
except:
end = None
if only_relations is None or r.get('relation', '') in only_relations:
if end is None or end > cut_off_year:
filtered_relations.add(relation_id)
else:
filtered_relations.add(relation_id)
return filtered_relations
[docs]
def post_bulk_edges(client, edges, batch_size = 50000):
"""Post a large list of Edges in batches with a progress bar.
Splits the edge list into chunks of ``batch_size`` and posts each chunk
via ``client.post_edges()``. Returns all posted Edge objects.
:param client: Client used to post the Edges.
:type client: Client
:param edges: List of Edge objects to post.
:type edges: list[Edge]
:param batch_size: Number of edges per batch. Default: 50000.
:type batch_size: int, optional
:return: List of all posted Edge objects across all batches.
:rtype: list[Edge]
"""
num_edges = len(edges)
result = []
for i in tqdm(range(0, num_edges, batch_size), total=(num_edges // batch_size + 1)):
end = min(i + batch_size, num_edges)
batch = client.post_edges(edges[i:end])
result += batch
return result
[docs]
def post_bulk_tags(client, tags, batch_size = 50000):
"""Post a large list of Tags in batches with a progress bar.
Splits the tag list into chunks of ``batch_size`` and posts each chunk
via ``client.post_tags()``. Returns all posted Tag objects.
:param client: Client used to post the Tags.
:type client: Client
:param tags: List of Tag objects to post.
:type tags: list[Tag]
:param batch_size: Number of tags per batch. Default: 50000.
:type batch_size: int, optional
:return: List of all posted Tag objects across all batches.
:rtype: list[Tag]
"""
num_tags = len(tags)
result = []
for i in tqdm(range(0, num_tags, batch_size), total=(num_tags // batch_size + 1)):
end = min(i + batch_size, num_tags)
batch = client.post_tags(tags[i:end])
result += batch
return result
[docs]
def overwrite_pdf(client, note_id, file_path):
"""
Overwrite all the references of a note with the new pdf file.
If the note has an original note then update original references
"""
note = client.get_note(id=note_id)
original_note = note
if note.original:
original_note = client.get_note(id=note.original)
references = client.get_references(referent=original_note.id)
invitaiton_id = original_note.invitation
updated_references = []
if references:
pdf_url = client.put_attachment(file_path, invitaiton_id, 'pdf')
for reference in references:
if 'pdf' in reference.content:
reference.content['pdf'] = pdf_url
updated_references.append(client.post_note(reference))
return updated_references
def pretty_id(group_id):
if not group_id:
return ''
if group_id.startswith('~') and len(group_id):
return re.sub('[0-9]+', '', group_id.replace('~', '').replace('_', ' '))
if group_id in ['everyone', '(anonymous)', '(guest)', '~']:
return group_id
tokens = group_id.split('/')
transformed_tokens = []
for token in tokens:
transformed_token=re.sub(r'\..+', '', token).replace('-', '').replace('_', ' ')
letters_only=re.sub(r'\d|\W', '', transformed_token)
has_no_ascii=not re.search(r'[a-zA-Z0-9]', transformed_token)
if letters_only != transformed_token.lower() or (has_no_ascii and transformed_token):
transformed_tokens.append(transformed_token)
return ' '.join(transformed_tokens)
def export_committee(client, committee_id, file_name):
members=client.get_group(committee_id).members
profiles=get_profiles(client, members)
with open(file_name, 'w') as outfile:
csvwriter = csv.writer(outfile, delimiter=',')
for profile in tqdm(profiles):
s = csvwriter.writerow([profile.get_preferred_email(), profile.get_preferred_name(pretty=True)])
def get_own_reviews(client):
baseurl_v1, baseurl_v2 = get_base_urls(client)
client_v1 = openreview.Client(baseurl=baseurl_v1, token=client.token)
client_v2 = openreview.api.OpenReviewClient(baseurl=baseurl_v2, token=client.token)
# Get all the reviews from v1
notes_v1 = client_v1.get_all_notes(tauthor=True)
submissions_and_official_reviews = []
# Filter Official Reviews
for note in notes_v1:
# Make sure that the Official Review is public
if 'Official_Review' not in note.invitation or 'everyone' not in note.readers:
continue
submission_id = note.forum
# Make sure that the submission is public
submission = client_v1.get_note(submission_id)
if 'everyone' not in submission.readers:
continue
# Add both submission and note
submissions_and_official_reviews.append((submission, note, 1))
# Get all the reviews from v2
profile_id = 'Guest' if not getattr(client, 'profile') else getattr(getattr(client, 'profile'), 'id')
if profile_id == 'Guest':
notes_v2 = []
else:
notes_v2 = client_v2.get_all_notes(signature=profile_id, transitive_members=True)
# TMLR was created before the invitation names were added to the
# group content, so we need to hardcode it
domain_to_reviewer_invitation_suffix = {
'TMLR': '/-/Review'
}
# Filter Official Reviews
for note in notes_v2:
# Get review invitation name from domain group content
if domain_to_reviewer_invitation_suffix.get(note.domain) is None:
domain = note.domain
group = client_v2.get_group(domain)
reviewer_invitation_suffix = getattr(group, 'content', None)
if group and reviewer_invitation_suffix:
reviewer_invitation_suffix = group.content.get('review_name', {}).get('value', None)
if reviewer_invitation_suffix is None:
continue
domain_to_reviewer_invitation_suffix[domain] = '/-/' + reviewer_invitation_suffix
reviewer_invitation_suffix = domain_to_reviewer_invitation_suffix[note.domain]
# Make sure that the Official Review is public
official_review = None
for invitation in note.invitations:
if reviewer_invitation_suffix in invitation:
official_review = note
if official_review is None or 'everyone' not in note.readers:
continue
submission_id = official_review.forum
# Make sure that the submission is public
submission = client_v2.get_note(submission_id)
if 'everyone' not in submission.readers:
continue
# Add both submission and note
submissions_and_official_reviews.append((submission, official_review, 2))
links = []
for submission, official_review, version in submissions_and_official_reviews:
submission_link = f'https://openreview.net/forum?id={submission.id}'
review_link = f'https://openreview.net/forum?id={submission.id}¬eId={official_review.id}'
submission_title = ''
if version == 1:
submission_title = submission.content.get('title', '')
else:
submission_title = submission.content.get('title', {}).get('value', '')
links.append({
'submission_title': submission_title,
'submission_link': submission_link,
'review_link': review_link
})
return links
def get_base_urls(client):
env = _identify_environment(client.baseurl)
if env == 'dev':
return [DEV_API_V1, DEV_API_V2]
if env == 'prod':
return [PROD_API_V1, PROD_API_V2]
return [LOCAL_API_V1, LOCAL_API_V2]
def get_site_url(client):
env = _identify_environment(client.baseurl)
if env == 'dev':
return DEV_SITE
if env == 'prod':
return PROD_SITE
return LOCAL_SITE
def resend_emails(client, request_id, groups):
message_requests = client.get_message_requests(id=request_id)
assert len(message_requests) == 1, 'Request not found'
message_request = message_requests[0]
message_request_optional_params = {
'sender': {}
}
if 'signature' in message_request:
message_request_optional_params['signature'] = message_request['signature']
if 'invitation' in message_request:
message_request_optional_params['invitation'] = message_request['invitation']
if 'ignoreRecipients' in message_request:
message_request_optional_params['ignoreRecipients'] = message_request['ignoreRecipients']
if 'fromName' in message_request:
message_request_optional_params['sender']['fromName'] = message_request['fromName']
if 'fromEmail' in message_request:
message_request_optional_params['sender']['fromEmail'] = message_request['fromEmail']
if 'replyTo' in message_request:
message_request_optional_params['replyTo'] = message_request['replyTo']
if 'parentGroup' in message_request:
message_request_optional_params['parentGroup'] = message_request['parentGroup']
client.post_message_request(message_request['subject'], groups, message_request['message'], **message_request_optional_params)
def get_invitation_source(invitation, domain):
submission_venue_id = domain.content.get('submission_venue_id', {}).get('value', None)
venue_id = domain.id
review_name = domain.content.get('review_name', {}).get('value', None)
meta_review_name = domain.content.get('meta_review_name', {}).get('value', None)
rebuttal_name = domain.content.get('rebuttal_name', {}).get('value', None)
source = invitation.content.get('source', { 'value': { 'venueid': submission_venue_id } }).get('value', { 'venueid': submission_venue_id }) if invitation.content else {}
## Deprecated, user source as dictionary
if isinstance(source, str):
if source == 'all_submissions':
source = { 'venueid': submission_venue_id }
elif source == 'accepted_submissions':
source = { 'venueid': [venue_id, submission_venue_id], 'with_decision_accept': True }
elif source == 'public_submissions':
source = { 'venueid': submission_venue_id, 'readers': ['everyone'] }
elif source == 'flagged_for_ethics_review':
source = { 'venueid': submission_venue_id, 'content': { 'flagged_for_ethics_review': True } }
##
## Deprecated, use source instead
reply_to = invitation.content.get('reply_to', {}).get('value', 'forum') if invitation.content else False
if isinstance(reply_to, str):
if reply_to == 'reviews':
source['reply_to'] = review_name
elif reply_to == 'metareviews':
source['reply_to'] = meta_review_name
elif reply_to == 'rebuttals':
source['reply_to'] = rebuttal_name
elif not (reply_to == 'forum' or reply_to == 'withForum'):
source['reply_to'] = reply_to
##
## Depreated, use source instead
source_submissions_query = invitation.content.get('source_submissions_query', {}).get('value', {}) if invitation.content else {}
for key, value in source_submissions_query.items():
if 'content' not in source:
source['content'] = {}
source['content'][key] = value
##
return source
[docs]
def should_match_invitation_source(client, invitation, submission, note=None, domain=None):
"""
Checks if the invitation source matches the submission and note.
"""
if domain is None:
domain = client.get_group(submission.domain)
source = get_invitation_source(invitation, domain)
if not source:
return False
if submission.content['venueid']['value'] not in source.get('venueid', []):
return False
if 'reply_to' in source and not note:
return False
if 'reply_to' in source and note and not note.invitations[0].endswith(f'/-/{source.get("reply_to")}'):
return False
if 'reply_to' not in source and note:
return False
if 'readers' in source and not set(source['readers']).issubset(set(submission.readers)):
return False
if 'content' in source:
for key, value in source.get('content', {}).items():
if value != submission.content.get(key, {}).get('value'):
return False
if 'with_decision_accept' in source:
with_decision_accept = source.get('with_decision_accept')
print('checking decision accept for submission', submission.id, 'with_decision_accept', with_decision_accept)
decision_invitation_id = f'{domain.id}/{domain.content["submission_name"]["value"]}{submission.number}/-/{domain.content.get("decision_name", {}).get("value", "Decision")}'
replies = submission.details.get('replies', submission.details.get('directReplies'))
if replies is None:
decision_notes = client.get_notes(forum=submission.id, invitation=decision_invitation_id)
else:
decision_notes = [openreview.api.Note.from_json(note) for note in replies if note['invitations'][0] == decision_invitation_id]
if not decision_notes:
return False
accept_options = domain.content.get('accept_decision_options', {}).get('value')
decision_value = decision_notes[0].content[domain.content.get('decision_field_name', {}).get('value', 'decision')]['value']
if is_accept_decision(decision_value, accept_options) != with_decision_accept:
return False
content_keys = invitation.edit.get('content', {}).keys()
if 'withdrawalId' in content_keys:
return False
if 'deskRejectionId' in content_keys:
return False
if 'noteReaders' in content_keys:
return False
if content_keys and 'noteId' not in content_keys:
return False
if content_keys and 'noteNumber' not in content_keys:
return False
if note and 'replyto' not in content_keys:
return False
return True
def is_forum_invitation(invitation):
content_keys = invitation.edit.get('content', {}).keys()
if 'noteId' not in content_keys:
return False
if 'noteNumber' not in content_keys:
return False
if 'replyto' in content_keys:
return False
return True
def create_replyto_invitations(client, submission, note):
venue_invitations = [i for i in client.get_all_invitations(prefix=note.domain + '/-/', type='invitation', domain=note.domain) if i.is_active()]
for invitation in venue_invitations:
print('processing invitation: ', invitation.id)
if should_match_invitation_source(client, invitation, submission, note):
print('create invitation: ', invitation.id)
content = {
'noteId': { 'value': note.forum },
'noteNumber': { 'value': submission.number },
'replyto': { 'value': note.id }
}
content_keys = invitation.edit.get('content', {}).keys()
if 'replytoSignatures' in content_keys:
content['replytoSignatures'] = { 'value': note.signatures[0] }
if 'replyNumber' in content_keys:
content['replyNumber'] = { 'value': note.number }
if 'invitationPrefix' in content_keys:
content['invitationPrefix'] = { 'value': note.invitations[0].replace('/-/', '/') + str(note.number) }
if 'replytoReplytoSignatures' in content_keys:
content['replytoReplytoSignatures'] = { 'value': client.get_note(note.replyto).signatures[0] }
client.post_invitation_edit(invitations=invitation.id,
content=content,
invitation=openreview.api.Invitation()
)
else:
print('skipping invitation: ', invitation.id, ' - does not match source')
def create_forum_invitations(client, submission):
invitation_invitations = [i for i in client.get_all_invitations(prefix=submission.domain + '/-/', type='invitation', domain=submission.domain) if i.is_active() and i.date_processes]
for invitation in invitation_invitations:
print('processing invitation: ', invitation.id)
if should_match_invitation_source(client, invitation, submission):
print('create invitation: ', invitation.id)
client.post_invitation_edit(invitations=invitation.id,
content={
'noteId': { 'value': submission.id },
'noteNumber': { 'value': submission.number }
},
invitation=openreview.api.Invitation()
)
else:
print('skipping invitation: ', invitation.id, ' - does not match source')
if is_forum_invitation(invitation):
forum_invitations = client.get_invitations(replyForum=submission.id, invitation=invitation.id)
for forum_invitation in forum_invitations:
print('delete invitation: ', forum_invitation.id)
client.post_invitation_edit(
invitations=f'{submission.domain}/-/Edit',
signatures=[submission.domain],
invitation=openreview.api.Invitation(id=forum_invitation.id,
ddate=openreview.tools.datetime_millis(datetime.datetime.now())
)
)
def singularize(word):
if word.endswith('ies'):
return word[:-3] + 'y'
elif word.endswith('es'):
return word[:-2]
elif word.endswith('s'):
return word[:-1]
return word
[docs]
def percentile(data, percent):
"""Return the percentile value from *data* using linear interpolation,
matching the behaviour of numpy.percentile with the default 'linear' method.
*percent* may be an int or float in [0, 100].
*data* must be a non-empty sequence of numbers.
"""
if not data:
raise ValueError("data must be non-empty")
sorted_data = sorted(data)
n = len(sorted_data)
if n == 1:
return sorted_data[0]
# NumPy linear interpolation: index = percent/100 * (n - 1)
idx = percent / 100.0 * (n - 1)
lo = int(idx)
hi = lo + 1
if hi >= n:
return sorted_data[-1]
frac = idx - lo
return sorted_data[lo] + frac * (sorted_data[hi] - sorted_data[lo])