Source code for openreview.tools

#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
import inspect

import json
import os

import openreview
import re
import datetime
import csv
from pylatexenc.latexencode import utf8tolatex, unicode_to_latex, UnicodeToLatexConversionRule, UnicodeToLatexEncoder, RULE_REGEX
import unicodedata
from Crypto.Hash import HMAC, SHA256
from multiprocessing import Pool, cpu_count
from tqdm import tqdm
import tld
import urllib.parse as urlparse
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
import random
import string
from deprecated.sphinx import deprecated
import jwt

# --- URL Constants ---
PROD_API_V1 = 'https://api.openreview.net'
PROD_API_V2 = 'https://api2.openreview.net'
PROD_SITE   = 'https://openreview.net'

DEV_API_V1 = 'https://api.dev.openreview.net'
DEV_API_V2 = 'https://api2.dev.openreview.net'
DEV_SITE   = 'https://dev.openreview.net'

LOCAL_API_V1 = os.environ.get('OPENREVIEW_API_V1_URL', 'http://localhost:3000')
LOCAL_API_V2 = os.environ.get('OPENREVIEW_API_V2_URL', 'http://localhost:3001')
LOCAL_SITE   = os.environ.get('OPENREVIEW_WEB_URL', 'http://localhost:3030')

# Remote-only lists (exclude localhost) used by client guards
V1_REMOTE_URLS = [PROD_API_V1, DEV_API_V1]
V2_REMOTE_URLS = [PROD_API_V2, DEV_API_V2]

def _identify_environment(baseurl):
    """Return 'dev', 'prod', or 'local' based on baseurl."""
    if any(url in baseurl for url in [DEV_API_V1, DEV_API_V2]):
        return 'dev'
    if any(url in baseurl for url in [PROD_API_V1, PROD_API_V2]):
        return 'prod'
    return 'local'

[docs] def decision_to_venue(venue_id, decision_option, accept_options=None): """ Returns the venue for a submission based on its decision :param venue_id: venue's short name (i.e., ICLR 2022) :type venue_id: string :param decision_option: paper decision (i.e., Accept, Reject) :type decision_option: string :param accept_options: accept decisions (i.e., [ Accept (Best Paper), Invite to Archive ]) :type accept_options: list """ venue = venue_id if is_accept_decision(decision_option, accept_options): decision = decision_option.replace('Accept', '') if 'Accept' in decision_option else decision_option decision = re.sub(r'[()\W]+', '', decision) if decision: venue += ' ' + decision.strip() else: venue = f'Submitted to {venue}' return venue
[docs] def is_accept_decision(decision, accept_options=None): """ Checks if decision is an accept decision :param decision: paper decision (i.e., Accept, Reject) :type decision: string :param accept_options: accept decisions (i.e., [ Accept (Best Paper), Invite to Archive ]) :type accept_options: list """ if (accept_options and decision in accept_options) or (not accept_options and 'Accept' in decision): return True return False
[docs] def run_once(f): """ Decorator to run a function only once and return its output for any subsequent call to the function without running it again """ def wrapper(*args, **kwargs): if not wrapper.has_run: wrapper.has_run = True wrapper.to_return = f(*args, **kwargs) return wrapper.to_return wrapper.has_run = False return wrapper
def format_params(params): if isinstance(params, dict): formatted_params = {} for key, value in params.items(): formatted_params[key] = format_params(value) return formatted_params if isinstance(params, list): formatted_params = [] for value in params: formatted_params.append(format_params(value)) return formatted_params if isinstance(params, bool): return json.dumps(params) return params
[docs] def concurrent_requests(request_func, params, desc='Gathering Responses', max_workers=None): """ Returns a list of results given for each request_func param execution. It shows a progress bar to know the progress of the task. :param request_func: a function to execute for each value of the list. :type request_func: function :param params: a list of values to be executed by request_func. :type params: list :param desc: description to show in the progress bar. :type desc: str :param max_workers: number of workers to use in the ThreadPoolExecutor, default value is min(16, cpu_count() * 5). :type max_workers: int :return: A list of results given for each func value execution :rtype: list """ if max_workers is None: max_workers = min(16, (cpu_count() or 1) * 5) futures = [] gathering_responses = tqdm(total=len(params), desc=desc) results = [] with ThreadPoolExecutor(max_workers=max_workers) as executor: for param in params: futures.append(executor.submit(request_func, param)) for future in futures: gathering_responses.update(1) results.append(future.result()) gathering_responses.close() return results
[docs] def get_profile(client, value, with_publications=False): """ Get a single profile (a note) by id, if available :param client: User that will retrieve the profile :type client: Client :param value: e-mail or id of the profile :type value: str :return: Profile with that matches the value passed as parameter :rtype: Profile """ profile = None try: profile = client.get_profile(value) if with_publications: baseurl_v1, baseurl_v2 = get_base_urls(client) client_v1 = openreview.Client(baseurl=baseurl_v1, token=client.token) #client_v2 = openreview.api.OpenReviewClient(baseurl=baseurl_v2, token=client.token) notes_v1 = list(iterget_notes(client_v1, content={'authorids': profile.id})) #notes_v2 = list(iterget_notes(client_v2, content={'authorids': profile.id})) profile.content['publications'] = notes_v1 #+ notes_v2 except openreview.OpenReviewException as e: # throw an error if it is something other than "not found" if 'Profile Not Found' not in e.args[0]: raise e return profile
[docs] def get_profiles(client, ids_or_emails, with_publications=False, with_relations=False, with_preferred_emails=None, as_dict=False): ''' Helper function that repeatedly queries for profiles, given IDs and emails. Useful for getting more Profiles than the server will return by default (1000) :param with_preferred_emails: invitation id to get the edges where the preferred emails are stored :type with_preferred_emails: str ''' ids = [] emails = [] for member in ids_or_emails: if '~' in member: ids.append(member) else: emails.append(member) profile_by_id = {} profile_by_id_or_email = {} def process_profile(profile, email=None): profile_by_id[profile.id] = profile for name in profile.content.get("names", []): if name.get("username"): profile_by_id_or_email[name.get("username")] = profile if email: profile_by_id_or_email[email] = profile batch_size = 1000 ## Get profiles by id and add them to the profiles list for i in range(0, len(ids), batch_size): batch_ids = ids[i:i+batch_size] batch_profiles = client.search_profiles(ids=batch_ids) for profile in batch_profiles: process_profile(profile) ## Get profiles by email and add them to the profiles list for j in range(0, len(emails), batch_size): batch_emails = emails[j:j+batch_size] batch_profile_by_email = client.search_profiles(confirmedEmails=batch_emails) for email, profile in batch_profile_by_email.items(): process_profile(profile, email) for email in emails: if email not in profile_by_id_or_email: profile = openreview.Profile( id=email, content={ 'emails': [email], 'preferredEmail': email, 'emailsConfirmed': [email], 'names': [] }) profile_by_id[profile.id] = profile profile_by_id_or_email[email] = profile ## Get publications for all the profiles profiles = list(profile_by_id.values()) if with_publications: baseurl_v1, baseurl_v2 = get_base_urls(client) client_v1 = openreview.Client(baseurl=baseurl_v1, token=client.token) client_v2 = openreview.api.OpenReviewClient(baseurl=baseurl_v2, token=client.token) # Fetch publications from both APIs in parallel per profile from concurrent.futures import ThreadPoolExecutor def get_publications(profile): with ThreadPoolExecutor(max_workers=2) as executor: future_v1 = executor.submit(client_v1.get_all_notes, content={'authorids': profile.id}) future_v2 = executor.submit(client_v2.get_all_notes, content={'authorids': profile.id}) pubs_v1 = future_v1.result() pubs_v2 = future_v2.result() return pubs_v1 + pubs_v2 publications_all = concurrent_requests(get_publications, profiles, desc='Loading publications from both APIs') for idx, publications in enumerate(publications_all): profiles[idx].content['publications'] = publications if with_relations: relation_profile_ids = set() for profile in profiles: relation_usernames = [relation.get('username') for relation in profile.content.get('relations', []) if relation.get('username')] relation_emails = [relation.get('email') for relation in profile.content.get('relations', []) if relation.get('email')] relation_profile_ids.update(relation_usernames) relation_profile_ids.update(relation_emails) relation_profiles_by_id = get_profiles(client, list(relation_profile_ids), as_dict=True) for profile in profiles: for relation in profile.content.get('relations', []): relation_profile = relation_profiles_by_id.get(relation.get('username')) or relation_profiles_by_id.get(relation.get('email')) if relation_profile: relation['profile_id'] = relation_profile.id if with_preferred_emails is not None: preferred_email_by_id = { g['id']['head']: g['values'][0]['tail'] for g in client.get_grouped_edges(invitation=with_preferred_emails, groupby='head', select='tail')} for profile in profiles: preferred_email = preferred_email_by_id.get(profile.id) if preferred_email: profile.content['preferredEmail'] = preferred_email if as_dict: profiles_as_dict = {} for id in ids: profiles_as_dict[id] = profile_by_id_or_email.get(id) for email in emails: profiles_as_dict[email] = profile_by_id_or_email.get(email) return profiles_as_dict return profiles
[docs] def get_group(client, id): """ Get a single Group by id if available :param client: User that will retrieve the group :type client: Client :param id: id of the group :type id: str :return: Group that matches the passed id :rtype: Group """ group = None try: group = client.get_group(id = id) except openreview.OpenReviewException as e: # throw an error if it is something other than "not found" error = e.args[0] if error.get('name') == 'NotFoundError' or error.get('message').startswith('Group Not Found'): return None else: raise e return group
[docs] def get_note(client, id): """ Get a single Note by id if available :param client: User that will retrieve the note :type client: Client :param id: id of the note :type id: str :return: Note that matches the passed id :rtype: Note """ note = None try: note = client.get_note(id = id) except openreview.OpenReviewException as e: # throw an error if it is something other than "not found" error = e.args[0] if error.get('name') == 'NotFoundError' or error.get('message').startswith('Note Not Found'): return None else: raise e return note
[docs] def get_invitation(client, id): """ Get a single Invitation by id if available :param client: User that will retrieve the invitation :type client: Client :param id: id of the invitation :type id: str :return: Invitation that matches the passed id or None if it does not exist or it is expired :rtype: Invitation """ invitation = None try: invitation = client.get_invitation(id = id) except openreview.OpenReviewException as e: print('Can not retrieve invitation', e) return invitation
[docs] def create_profile(client, email, fullname, super_user='openreview.net'): """ Given email, first name, last name, and middle name (optional), creates a new profile. :param client: User that will create the Profile :type client: Client :param email: Preferred e-mail in the Profile :type email: str :param fullname: Full name of the user :type fullname: str :param super_user: Super user of the system :type super_user: str :return: The created Profile :rtype: Profile """ profile = get_profile(client, email) if profile: raise openreview.OpenReviewException('There is already a profile with this email address: {}'.format(email)) username_response = client.get_tildeusername(fullname) tilde_id = username_response['username'] tilde_group = openreview.api.Group(id=tilde_id, signatures=[client.profile.id], signatories=[tilde_id], readers=[tilde_id], writers=[client.profile.id], members=[email]) email_group = openreview.api.Group(id=email, signatures=[client.profile.id], signatories=[email], readers=[email], writers=[client.profile.id], members=[tilde_id]) profile_content = { 'emails': [email], 'preferredEmail': email, 'names': [ { 'fullname': fullname, 'username': tilde_id } ], } client.post_group_edit( f'{super_user}/-/Username', signatures=[super_user], readers=[tilde_id], writers=[super_user], group=tilde_group ) client.post_group_edit( f'{super_user}/-/Email', signatures=[super_user], readers=[tilde_id], writers=[super_user], group=email_group ) profile = client.post_profile(openreview.Profile(id=tilde_id, content=profile_content, signatures=[tilde_id])) return profile
def create_authorid_profiles(client, note): # for all submissions get authorids, if in form of email address, try to find associated profile # if profile doesn't exist, create one created_profiles = [] if not 'authors' in note.content or not 'authorids' in note.content: return created_profiles author_names = [a.replace('*', '') for a in note.content['authors']['value']] author_emails = [e for e in note.content['authorids']['value']] if len(author_names) != len(author_emails): print('{}: length mismatch. authors ({}), authorids ({})'.format( note.id, len(author_names), len(author_emails) )) return created_profiles # iterate through authorids and authors at the same time for (author_id, author_name) in zip(author_emails, author_names): author_id = author_id.strip() author_name = author_name.strip() if '@' in author_id: try: profile = create_profile(client=client, email=author_id, fullname=author_name) created_profiles.append(profile) print('{}: profile created with id {}'.format(note.id, profile.id)) except openreview.OpenReviewException as e: print('Error while creating profile for note id {note_id}, author {author_id}, '.format(note_id=note.id, author_id=author_id), e) return created_profiles
[docs] def get_preferred_name(profile, last_name_only=False): """ Accepts openreview.Profile object :param profile: Profile from which the preferred name will be retrieved :type profile: Profile :return: User's preferred name, if available, or the first listed name if not available. :rtype: str """ names = profile.content['names'] preferred_names = [n for n in names if n.get('preferred', False)] if preferred_names: primary_preferred_name = preferred_names[0] else: primary_preferred_name = names[0] if last_name_only: return primary_preferred_name['fullname'].split(' ')[-1] return primary_preferred_name['fullname']
[docs] def generate_bibtex(note, venue_fullname, year, url_forum=None, paper_status='under review', anonymous=True, names_reversed=False, baseurl='https://openreview.net', editor=None): """ Generates a bibtex field for a given Note. :param note: Note from which the bibtex is generated :type note: Note :param venue_fullname: Full name of the venue to be placed in the book title field :type venue_fullname: str :param year: Note year :type year: str :param url_forum: Forum id, if none is provided, it is obtained from the note parameter: note.forum :type url_forum: str, optional :param paper_status: Used to indicate the status of a paper: ["accepted", "rejected" or "under review"] :type paper_status: string, optional :param anonymous: Used to indicate whether or not the paper's authors should be revealed :type anonymous: bool, optional :param names_reversed: If true, it indicates that the last name is written before the first name :type names_reversed: bool, optional :param baseurl: Base url where the bibtex is from. Default https://openreview.net :type baseurl: str, optional :return: Note bibtex :rtype: str """ note_title = note.content['title'] if isinstance(note.content['title'], str) else note.content['title']['value'] first_word = re.sub('[^a-zA-Z]', '', note_title.split(' ')[0].lower()) forum = note.forum if not url_forum else url_forum if anonymous: first_author_last_name = 'anonymous' authors = 'Anonymous' else: note_author_list = note.content['authors'] if isinstance(note.content['authors'], list) else note.content['authors']['value'] first_author_last_name = note_author_list[0].split(' ')[-1].lower() if names_reversed: # last, first author_list = [] for name in note_author_list: last = name.split(' ')[-1] rest = (' ').join(name.split(' ')[:-1]) author_list.append(last+', '+rest) authors = ' and '.join(author_list) else: authors = ' and '.join(note_author_list) u = UnicodeToLatexEncoder( conversion_rules=[ UnicodeToLatexConversionRule( rule_type=RULE_REGEX, rule=[ (re.compile(r'[A-Z]{2,}'), r'{\g<0>}') ]), 'defaults' ] ) bibtex_title = u.unicode_to_latex(note_title) bibtex_key = unicodedata.normalize('NFKD',first_author_last_name + year + first_word + ',').encode("ascii", "ignore").decode("ascii") if paper_status == 'under review': under_review_bibtex = [ '@inproceedings{', bibtex_key, 'title={' + bibtex_title + '},', 'author={' + utf8tolatex(authors) + '},', 'booktitle={Submitted to ' + utf8tolatex(venue_fullname) + '},', 'year={' + year + '},', 'url={'+baseurl+'/forum?id=' + forum + '},', 'note={under review}', '}' ] return '\n'.join(under_review_bibtex) if paper_status == 'accepted': accepted_bibtex = [ '@inproceedings{', bibtex_key, 'title={' + bibtex_title + '},', 'author={' + utf8tolatex(authors) + '},', 'booktitle={' + utf8tolatex(venue_fullname) + '},' ] if editor: accepted_bibtex.append('editor={' + utf8tolatex(editor) + '},') accepted_bibtex = accepted_bibtex + [ 'year={' + year + '},', 'url={'+baseurl+'/forum?id=' + forum + '}', '}' ] return '\n'.join(accepted_bibtex) if paper_status == 'rejected': rejected_bibtex = [ '@misc{', bibtex_key, 'title={' + bibtex_title + '},', 'author={' + utf8tolatex(authors) + '},', 'year={' + year + '},', 'url={'+baseurl+'/forum?id=' + forum + '}', '}' ] return '\n'.join(rejected_bibtex)
@run_once def load_duplicate_domains(): dir_path = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(dir_path, 'duplicate_domains.json')) as f: duplicate_domains = json.load(f) f.close() return duplicate_domains
[docs] def subdomains(domain): """ Given an email address, returns a list with the domains and subdomains. :param domain: e-mail address or domain of the e-mail address :type domain: str :return: List of domains and subdomains :rtype: list[str] Example: >>> subdomains('johnsmith@iesl.cs.umass.edu') [u'iesl.cs.umass.edu', u'cs.umass.edu', u'umass.edu'] """ duplicate_domains: dict = load_duplicate_domains() domain_components = [c for c in domain.split('.') if c and not c.isspace()] domains = ['.'.join(domain_components[index:len(domain_components)]) for index, path in enumerate(domain_components)] valid_domains = set() for d in domains: if not tld.is_tld(d): valid_domains.add(duplicate_domains.get(d, d)) return sorted(valid_domains)
[docs] def get_paperhash(first_author, title): """ Returns the paperhash of a paper, given the title and first author. :param first_author: First author that appears on the paper :type first_author: str :param title: Title of the paper :type title: str :return: paperhash, see example :rtype: str Example: >>> get_paperhash('David Soergel', 'Open Scholarship and Peer Review: a Time for Experimentation') u'soergel|open_scholarship_and_peer_review_a_time_for_experimentation' """ title = title.strip() strip_punctuation = r'[^A-zÀ-ÿ\d\s]' title = re.sub(strip_punctuation, '', title) first_author = re.sub(strip_punctuation, '', first_author) first_author = first_author.split(' ').pop() title = re.sub(strip_punctuation, '', title) title = re.sub('\r|\n', '', title) title = re.sub(r'\s+', '_', title) first_author = re.sub(strip_punctuation, '', first_author) return (first_author + '|' + title).lower()
[docs] def replace_members_with_ids(client, group): """ Given a Group object, iterates through the Group's members and, for any member represented by an email address, attempts to find a profile associated with that email address. If a profile is found, replaces the email with the profile id. :param client: Client used to get the Profiles and to post the new Group :type client: Client :param group: Group for which the profiles will be updated :type group: Group :return: Group with the emails replaced by Profile ids :rtype: Group """ updated_members = [] without_profile_ids = [] member_profiles = get_profiles(client, group.members, as_dict=True) for member in group.members: profile = member_profiles.get(member) if profile is not None: updated_members.append(profile.id) elif member.startswith('~'): without_profile_ids.append(member) else: updated_members.append(member) if without_profile_ids: raise openreview.OpenReviewException(f"Profile Not Found for {without_profile_ids}") group.members = updated_members if getattr(client, 'post_group', None): return client.post_group(group) if getattr(client, 'post_group_edit', None): client.post_group_edit( invitation = group.domain + '/-/Edit', readers = [group.domain], writers = [group.domain], signatures = [group.domain], group = openreview.api.Group( id = group.id, members = list(set(group.members)) ), flush_members_cache=False ) return client.get_group(group.id)
[docs] def concurrent_get(client, get_function, **params): """ Given a function that takes a single parameter, returns a list of results. :param client: Client used to make requests :param get_function: Function that takes a that performs the request :type get_function: function :param params: Parameters to pass to the get_function :type params: dict :return: List of results :rtype: list """ max_workers = min(16, (cpu_count() or 1) * 5) if (params.get('limit') or float('inf')) <= client.limit: docs = get_function(**params) return docs else: get_count_params = params.copy() if get_count_params.get('offset') is not None: get_count_params.pop('offset') get_count_params['with_count'] = True get_count_params['limit'] = 1 _, count = get_function(**get_count_params) params['with_count'] = False limit = params.get('limit') if (limit or client.limit) > client.limit: params.pop('limit') docs = get_function(**params) offset = params.get('offset') or 0 if (count - offset) <= client.limit: return docs start = offset + client.limit if limit is None: end = count else: end = min(offset + limit, count) offset_list = list(range(start, end, client.limit)) futures = [] gathering_responses = tqdm(total=len(offset_list), desc='Gathering Responses') with ThreadPoolExecutor(max_workers=max_workers) as executor: for count, offset in enumerate(offset_list): params['offset'] = offset if (count + 1) == len(offset_list) and (end - offset) > 0: params['limit'] = end - offset futures.append(executor.submit(get_function, **params)) for future in futures: gathering_responses.update(1) docs.extend(future.result()) gathering_responses.close() return docs
[docs] class iterget: """ This class can create an iterator from a getter method that returns a list. Below all the iterators that can be created from a getter method: :meth:`openreview.Client.get_tags` --> :func:`tools.iterget_tags` :meth:`openreview.Client.get_notes` --> :func:`tools.iterget_notes` :meth:`openreview.Client.get_references` --> :func:`tools.iterget_references` :meth:`openreview.Client.get_invitations` --> :func:`tools.iterget_invitations` :meth:`openreview.Client.get_groups` --> :func:`tools.iterget_groups` :param get_function: Any of the aforementioned methods :type get_function: function :param params: Dictionary containing parameters for the corresponding method. Refer to the passed method documentation for details :type params: dict """ def __init__(self, get_function, **params): self.offset = 0 self.last_batch = False self.batch_finished = False self.obj_index = 0 self.params = params self.params.update({ 'offset': self.offset, 'limit': params.get('limit') or 1000 }) self.get_function = get_function self.current_batch = self.get_function(**self.params) def update_batch(self): self.offset += self.params['limit'] self.params['offset'] = self.offset next_batch = self.get_function(**self.params) if next_batch: self.current_batch = next_batch else: self.current_batch = [] def __iter__(self): return self def __next__(self): if len(self.current_batch) == 0: raise StopIteration else: next_obj = self.current_batch[self.obj_index] if (self.obj_index + 1) == len(self.current_batch): self.update_batch() self.obj_index = 0 else: self.obj_index += 1 return next_obj next = __next__
[docs] class efficient_iterget: """ This class can create an iterator from a getter method that returns a list. Below all the iterators that can be created from a getter method: :meth:`openreview.Client.get_tags` --> :func:`tools.iterget_tags` :meth:`openreview.Client.get_notes` --> :func:`tools.iterget_notes` :meth:`openreview.Client.get_references` --> :func:`tools.iterget_references` :meth:`openreview.Client.get_invitations` --> :func:`tools.iterget_invitations` :meth:`openreview.Client.get_groups` --> :func:`tools.iterget_groups` :param get_function: Any of the aforementioned methods :type get_function: function :param params: Dictionary containing parameters for the corresponding method. Refer to the passed method documentation for details :type params: dict """ def __init__(self, get_function, desc='Gathering Responses', **params): self.obj_index = 0 self.params = params self.params.update({ 'with_count': True, 'sort': params.get('sort') or 'id', 'limit': params.get('limit') or 1000 }) self.get_function = get_function self.current_batch, total = self.get_function(**self.params) self.gathering_responses = tqdm(total=total, desc=desc) if total > self.params['limit'] else None def update_batch(self): after = self.current_batch[-1].id self.params['after'] = after self.params['with_count'] = False next_batch = self.get_function(**self.params) if next_batch: self.current_batch = next_batch else: self.current_batch = [] def __iter__(self): return self def __next__(self): if len(self.current_batch) == 0: if self.gathering_responses: self.gathering_responses.close() raise StopIteration else: next_obj = self.current_batch[self.obj_index] if (self.obj_index + 1) == len(self.current_batch): self.update_batch() self.obj_index = 0 else: if self.gathering_responses: self.gathering_responses.update(1) self.obj_index += 1 return next_obj next = __next__
[docs] def iterget_messages(client, to = None, subject = None, status = None): """ Returns an iterator over Messages ignoring API limit. Example: >>> iterget_messages(client, to='melisa@mail.com') :return: Iterator over Messages filtered by the provided parameters :rtype: iterget """ params = { 'to': to, 'subject': subject, 'status': status } return iterget(client.get_messages, **params)
[docs] def iterget_tags(client, id = None, invitation = None, forum = None, signature = None, tag = None): """ Returns an iterator over Tags ignoring API limit. Example: >>> iterget_tags(client, invitation='MyConference.org/-/Bid_Tags') :param client: Client used to get the Tags :type client: Client :param id: a Tag ID. If provided, returns Tags whose ID matches the given ID. :type id: str, optional :param forum: a Note ID. If provided, returns Tags whose forum matches the given ID. :type forum: str, optional :param invitation: an Invitation ID. If provided, returns Tags whose "invitation" field is this Invitation ID. :type invitation: str, optional :return: Iterator over Tags filtered by the provided parameters :rtype: iterget """ params = {} if id is not None: params['id'] = id if forum is not None: params['forum'] = forum if invitation is not None: params['invitation'] = invitation if signature is not None: params['signature'] = signature if tag is not None: params['tag'] = tag return iterget(client.get_tags, **params)
[docs] def iterget_edges (client, invitation = None, head = None, tail = None, label = None, limit = None, trash = None): """Return an iterator over Edges, bypassing API pagination limits. Fetches all matching edges across multiple API pages transparently. Use this instead of ``client.get_edges()`` when the result set may exceed the single-request limit. :param client: Client used to get the Edges. :type client: Client :param invitation: An Invitation ID. If provided, returns Edges whose invitation field matches. :type invitation: str, optional :param head: A head entity ID. If provided, returns Edges whose head field matches. :type head: str, optional :param tail: A tail entity ID. If provided, returns Edges whose tail field matches. :type tail: str, optional :param label: If provided, returns Edges whose label field matches. :type label: str, optional :param limit: Maximum number of Edges to return. If None, returns all matching Edges. :type limit: int, optional :param trash: If True, includes Edges that have been deleted. :type trash: bool, optional :return: Iterator over Edge objects matching the provided filters. :rtype: iterget """ params = {} if invitation is not None: params['invitation'] = invitation if head is not None: params['head'] = head if tail is not None: params['tail'] = tail if label is not None: params['label'] = label if limit is not None: params['limit'] = limit if trash == True: params['trash']=True return iterget(client.get_edges, **params)
[docs] def iterget_grouped_edges( client, invitation=None, groupby='head', select='id,tail,label,weight', logger=None ): '''Helper function for retrieving and parsing all edges in bulk''' ## Backend has pagination temporally disabled, it returns all the groups now so we need to do one iteration. grouped_edges_iterator = client.get_grouped_edges(invitation=invitation, groupby=groupby, select=select) for group in grouped_edges_iterator: group_edges = [] for group_values in group['values']: edge_params = { 'readers': [], 'writers': [], 'signatures': [], 'invitation': invitation } edge_params.update(group_values) edge_params.update(group['id']) group_edges.append(openreview.Edge(**edge_params)) yield group_edges
[docs] @deprecated(version='1.52.6', reason="Use client.get_all_notes() instead") def iterget_notes(client, id = None, paperhash = None, forum = None, invitation = None, replyto = None, tauthor = None, signature = None, writer = None, trash = None, number = None, mintcdate = None, content = None, details = None, sort = None): """ Returns an iterator over Notes filtered by the provided parameters ignoring API limit. :param client: Client used to get the Notes :type client: Client :param id: a Note ID. If provided, returns Notes whose ID matches the given ID. :type id: str, optional :param paperhash: a "paperhash" for a note. If provided, returns Notes whose paperhash matches this argument. (A paperhash is a human-interpretable string built from the Note's title and list of authors to uniquely identify the Note) :type paperhash: str, optional :param forum: a Note ID. If provided, returns Notes whose forum matches the given ID. :type forum: str, optional :param invitation: an Invitation ID. If provided, returns Notes whose "invitation" field is this Invitation ID. :type invitation: str, optional :param replyto: a Note ID. If provided, returns Notes whose replyto field matches the given ID. :type replyto: str, optional :param tauthor: a Group ID. If provided, returns Notes whose tauthor field ("true author") matches the given ID, or is a transitive member of the Group represented by the given ID. :type tauthor: str, optional :param signature: a Group ID. If provided, returns Notes whose signatures field contains the given Group ID. :type signature: str, optional :param writer: a Group ID. If provided, returns Notes whose writers field contains the given Group ID. :type writer: str, optional :param trash: If True, includes Notes that have been deleted (i.e. the ddate field is less than the current date) :type trash: bool, optional :param number: If present, includes Notes whose number field equals the given integer. :type number: int, optional :param mintcdate: Represents an Epoch time timestamp in milliseconds. If provided, returns Notes whose "true creation date" (tcdate) is at least equal to the value of mintcdate. :type mintcdate: int, optional :param content: If present, includes Notes whose each key is present in the content field and it is equals the given value. :type content: dict, optional :param details: TODO: What is a valid value for this field? :type details: str, optional :return: Iterator over Notes filtered by the provided parameters :rtype: iterget """ params = {} if id is not None: params['id'] = id if paperhash is not None: params['paperhash'] = paperhash if forum is not None: params['forum'] = forum if invitation is not None: params['invitation'] = invitation if replyto is not None: params['replyto'] = replyto if tauthor is not None: params['tauthor'] = tauthor if signature is not None: params['signature'] = signature if writer is not None: params['writer'] = writer if trash == True: params['trash']=True if number is not None: params['number'] = number if mintcdate is not None: params['mintcdate'] = mintcdate if content is not None: params['content'] = content if details is not None: params['details'] = details params['sort'] = sort return efficient_iterget(client.get_notes, desc='Getting Notes', **params)
[docs] @deprecated(version='1.52.6', reason="Use client.get_all_references() instead") def iterget_references(client, referent = None, invitation = None, mintcdate = None): """ Returns an iterator over references filtered by the provided parameters ignoring API limit. :param client: Client used to get the references :type client: Client :param referent: a Note ID. If provided, returns references whose "referent" value is this Note ID. :type referent: str, optional :param invitation: an Invitation ID. If provided, returns references whose "invitation" field is this Invitation ID. :type invitation: str, optional :param mintcdate: Represents an Epoch time timestamp in milliseconds. If provided, returns references whose "true creation date" (tcdate) is at least equal to the value of mintcdate. :type mintcdate: int, optional :return: Iterator over references filtered by the provided parameters :rtype: iterget """ params = {} if referent is not None: params['referent'] = referent if invitation is not None: params['invitation'] = invitation if mintcdate is not None: params['mintcdate'] = mintcdate return iterget(client.get_references, **params)
[docs] @deprecated(version='1.52.6', reason="Use client.get_all_invitations() instead") def iterget_invitations(client, id=None, ids=None, invitee=None, regex=None, tags=None, minduedate=None, duedate=None, pastdue=None, replytoNote=None, replyForum=None, signature=None, note=None, replyto=None, details=None, expired=None, super=None, sort=None): """ Returns an iterator over invitations, filtered by the provided parameters, ignoring API limit. :param client: Client used to get the Invitations :type client: Client :param id: an Invitation ID. If provided, returns invitations whose "id" value is this Invitation ID. :type id: str, optional :param ids: Comma separated Invitation IDs. If provided, returns invitations whose "id" value is any of the passed Invitation IDs. :type ids: str, optional :param invitee: Essentially, invitees field in an Invitation object contains Group Ids being invited using the invitation. If provided, returns invitations whose "invitee" field contains the given string. :type invitee: str, optional :param regex: a regular expression string to match Invitation IDs. If provided, returns invitations whose "id" value matches the given regex. :type regex: str, optional :param tags: If provided, returns Invitations whose Tags field contains the given Tag IDs. :type tags: list[str], optional :param minduedate: Represents an Epoch time timestamp in milliseconds. If provided, returns Invitations whose duedate is at least equal to the value of minduedate. :type minduedate: int, optional :param duedate: Represents an Epoch time timestamp in milliseconds. If provided, returns Invitations whose duedate field matches the given duedate. :type duedate: int, optional :param pastdue: :type pastdue: bool, optional :param replytoNote: a Note ID. If provided, returns Invitations whose replytoNote field contains the given Note ID. :type replytoNote: str, optional :param replyForum: a forum ID. If provided, returns Invitations whose forum field contains the given forum ID. :type replyForum: str, optional :param signature: a Group ID. If provided, returns Invitations whose signature field contains the given Group ID. :type signature: str, optional :param note: a Note ID. If provided, returns Invitations whose note field contains the given Note ID. :type note: str, optional :param replyto: a Note ID. If provided, returns Invitations whose replyto field matches the given Note ID. :type replyto: str, optional :param details: :type details: str, optional :param expired: get also expired invitions, by default returns 'active' invitations. :type expired: bool, optional :return: Iterator over Invitations filtered by the provided parameters :rtype: iterget """ params = {} if id is not None: params['id'] = id if ids is not None: params['ids'] = ids if invitee is not None: params['invitee'] = invitee if regex is not None: params['regex'] = regex if tags is not None: params['tags'] = tags if minduedate is not None: params['minduedate'] = minduedate if duedate is not None: params['duedate'] = duedate if pastdue is not None: params['pastdue'] = pastdue if details is not None: params['details'] = details if replytoNote is not None: params['replytoNote'] = replytoNote if replyForum is not None: params['replyForum'] = replyForum if signature is not None: params['signature'] = signature if note is not None: params['note'] = note if replyto is not None: params['replyto'] = replyto if super is not None: params['super'] = super if expired is not None: params['expired'] = expired if sort is not None: params['sort'] = sort return efficient_iterget(client.get_invitations, desc='Getting Invitations', **params)
[docs] @deprecated(version='1.52.6', reason="Use client.get_all_groups() instead") def iterget_groups(client, id = None, regex = None, member = None, host = None, signatory = None, web = None): """ Returns an iterator over groups filtered by the provided parameters ignoring API limit. :param client: Client used to get the Groups :type client: Client :param id: a Note ID. If provided, returns groups whose "id" value is this Group ID. :type id: str, optional :param regex: a regular expression string to match Group IDs. If provided, returns groups whose "id" value matches the given regex. :type regex: str, optional :param member: Essentially, members field contains Group Ids that are members of this Group object. If provided, returns groups whose "members" field contains the given string. :type member: str, optional :param host: :type host: str, optional :param signatory: a Group ID. If provided, returns Groups whose signatory field contains the given Group ID. :type signatory: str, optional :param web: Groups that contain a web field value :type web: bool, optional :return: Iterator over Groups filtered by the provided parameters :rtype: iterget """ params = {} if id is not None: params['id'] = id if regex is not None: params['regex'] = regex if member is not None: params['member'] = member if host is not None: params['host'] = host if signatory is not None: params['signatory'] = signatory if web is not None: params['web'] = web return efficient_iterget(client.get_groups, desc='Getting Groups', **params)
[docs] def timestamp_GMT(year, month, day, hour=0, minute=0, second=0): """ Given year, month, day, and (optionally) hour, minute, second in GMT time zone: returns the number of milliseconds between this date and Epoch Time (Jan 1, 1970). :param year: year >= 1970 :type year: int :param month: value from 1 to 12 :type month: int :param day: value from 1 to 28, 29, 30, or 31; depending on the month value. :type day: int :param hour: value from 0 to 23 :type hour: int, optional :param minute: value from 0 to 59 :type minute: int, optional :param second: value from 0 to 59 :type second: int, optional :return: Number of milliseconds between the passed date and Epoch Time (Jan 1, 1970) :rtype: int >>> timestamp_GMT(1990, 12, 20, hour=12, minute=30, second=24) 661696224000 """ return datetime_millis(datetime.datetime(year, month, day, hour, minute, second))
[docs] def datetime_millis(dt): """ Converts a datetime to milliseconds. :param dt: A date that want to be converted to milliseconds :type dt: datetime :return: The time from Jan 1, 1970 to the passed date in milliseconds :rtype: int """ if isinstance(dt, datetime.datetime): return int(dt.timestamp() * 1000) return dt
[docs] def recruit_reviewer(client, user, first, hash_seed, recruit_reviewers_id, recruit_message, recruit_message_subj, reviewers_invited_id, contact_info='info@openreview.net', verbose=True, replyTo=None, invitation=None, signature=None): """ Recruit a reviewer. Sends an email to the reviewer with a link to accept or reject the recruitment invitation. :param client: Client used to send the e-mail :type client: Client :param user: User to whom the e-mail will be sent :type user: str :param first: First name of the person to whom e-mail will be sent :type first: str :param hash_seed: a random number for seeding the hash. :type hash_seed: int :param recruit_message: a formattable string containing the following string variables: (name, accept_url, decline_url) :type recruit_message: str :param recruit_message_subj: subject line for the recruitment email :type recruit_message_subj: str :param reviewers_invited_id: group ID for the "Reviewers Invited" group, often used to keep track of which reviewers have already been emailed. str :type reviewers_invited_id: str :param contact_info: The information used to contact support for questions :type contact_info: str :param verbose: Shows response of :meth:`openreview.Client.post_message` and shows the body of the message sent :type verbose: bool, optional :param baseurl: Use this baseUrl instead of client.baseurl to create recruitment links :type baseurl: str, optional """ # the HMAC.new() function only accepts bytestrings, not unicode. # In Python 3, all strings are treated as unicode by default, so we must call encode on # these unicode strings to convert them to bytestrings. This behavior is the same in # Python 2, because we imported unicode_literals from __future__. hashkey = HMAC.new(hash_seed.encode('utf-8'), msg=user.encode('utf-8'), digestmod=SHA256).hexdigest() baseurl = 'https://openreview.net' #Always pointing to the live site so we don't send more invitations with localhost # build the URL to send in the message url = '{baseurl}/invitation?id={recruitment_inv}&user={user}&key={hashkey}'.format( baseurl = baseurl if baseurl else client.baseurl, recruitment_inv = recruit_reviewers_id, user = urlparse.quote(user), hashkey = hashkey ) # format the message defined above personalized_message = recruit_message.replace("{{fullname}}", first) if first else recruit_message personalized_message = personalized_message.replace("{{accept_url}}", url + "&response=Yes") personalized_message = personalized_message.replace("{{decline_url}}", url + "&response=No") personalized_message = personalized_message.replace("{{invitation_url}}", url) personalized_message = personalized_message.replace("{{contact_info}}", contact_info) personalized_message.format() try: client.add_members_to_group(reviewers_invited_id, [user]) except openreview.OpenReviewException as e: raise e # send the email through openreview if invitation is not None: response = client.post_message(recruit_message_subj, [user], personalized_message, parentGroup=reviewers_invited_id, replyTo=replyTo, invitation=invitation, signature=signature) else: response = client.post_message(recruit_message_subj, [user], personalized_message, parentGroup=reviewers_invited_id, replyTo=replyTo) if verbose: print("Sent to the following: ", response) print(personalized_message)
[docs] def recruit_user(client, user, hash_seed, recruitment_message_subject, recruitment_message_content, recruitment_invitation_id, comittee_invited_id, contact_email, message_invitation, message_signature, name=None): """Send a recruitment email to a user with a personalized acceptance link. Generates an HMAC-based hash key for the user, builds a unique recruitment URL, personalizes the message template by replacing ``{{fullname}}``, ``{{invitation_url}}``, and ``{{contact_info}}`` placeholders, and sends the email via ``client.post_message()``. :param client: Client used to send the recruitment email. :type client: Client :param user: Email address or profile ID of the user to recruit. :type user: str :param hash_seed: Secret seed used to generate the HMAC hash key for the recruitment link. :type hash_seed: str :param recruitment_message_subject: Subject line for the recruitment email. :type recruitment_message_subject: str :param recruitment_message_content: Message body template. Supports ``{{fullname}}``, ``{{invitation_url}}``, and ``{{contact_info}}`` placeholders. :type recruitment_message_content: str :param recruitment_invitation_id: Invitation ID used in the recruitment URL. :type recruitment_invitation_id: str :param comittee_invited_id: Group ID for the invited committee group, used as parentGroup for the message. (Note: parameter name is a legacy misspelling of "committee".) :type comittee_invited_id: str :param contact_email: Contact email address substituted into ``{{contact_info}}`` and used as the replyTo address. :type contact_email: str :param message_invitation: Invitation ID for the message invitation. :type message_invitation: str :param message_signature: Signature used when posting the message. :type message_signature: str :param name: Full name of the user, used to replace ``{{fullname}}`` in the message. :type name: str, optional """ hashkey = HMAC.new(hash_seed.encode('utf-8'), msg=user.encode('utf-8'), digestmod=SHA256).hexdigest() url = f'https://openreview.net/invitation?id={recruitment_invitation_id}&user={urlparse.quote(user)}&key={hashkey}' personalized_message = recruitment_message_content.replace("{{fullname}}", name) if name else recruitment_message_content personalized_message = personalized_message.replace("{{invitation_url}}", url) personalized_message = personalized_message.replace("{{contact_info}}", contact_email) personalized_message.format() client.post_message(recruitment_message_subject, [user], personalized_message, parentGroup=comittee_invited_id, replyTo=contact_email, invitation=message_invitation, signature=message_signature)
[docs] def get_user_hash_key(user, hash_seed, invitation=None): """Generate a hash key for a user's recruitment or authentication link. When ``invitation`` is provided, returns a JWT token encoding the user and invitation. Otherwise, returns an HMAC-SHA256 hex digest keyed by ``hash_seed``. :param user: Email address or group ID of the user. :type user: str :param hash_seed: Secret seed used for HMAC hashing or JWT signing. :type hash_seed: str :param invitation: Invitation ID. If provided, a JWT is returned instead of an HMAC hash. :type invitation: str, optional :return: JWT token string (if invitation is given) or HMAC-SHA256 hex digest. :rtype: str """ if invitation is not None: jwt_payload = { "group": user, "invitation": invitation, } return jwt.encode(jwt_payload, hash_seed, algorithm="HS256") hashkey = HMAC.new(hash_seed.encode('utf-8'), msg=user.encode('utf-8'), digestmod=SHA256).hexdigest() return hashkey
def get_user_parse(user, quote=True): if quote: return urlparse.quote(user) return urlparse.unquote(user) def create_hash_seed(): characters = string.ascii_letters + string.digits # Includes uppercase, lowercase letters, and digits return ''.join(random.choices(characters, k=16))
[docs] def get_all_venues(client): """ Returns a list of all the venues :param client: Client used to get all the venues :type client: Client :return: List of all the venues represented by a their corresponding Group id :rtype: list[str] """ return client.get_group("host").members
def info_function_builder(policy_function): def inner(profile, n_years=None, submission_venueid=None): common_domains = ['gmail.com', 'qq.com', '126.com', '163.com', 'outlook.com', 'hotmail.com', 'yahoo.com', 'foxmail.com', 'aol.com', 'msn.com', 'ymail.com', 'googlemail.com', 'live.com'] argspec = inspect.getfullargspec(policy_function) if 'submission_venueid' in argspec.args: result = policy_function(profile, n_years, submission_venueid) else: result = policy_function(profile, n_years) domains = set() subdomains_dict = {} for domain in result['domains']: if domain not in subdomains_dict: subdomains = openreview.tools.subdomains(domain) subdomains_dict[domain] = subdomains domains.update(subdomains_dict[domain]) # Filter common domains for common_domain in common_domains: domains.discard(common_domain) result['domains'] = list(domains) return result return inner
[docs] def get_conflicts(author_profiles, user_profile, policy='default', n_years=None): """ Finds conflicts between the passed user Profile and the author Profiles passed as arguments :param author_profiles: List of Profiles for which an association is to be found :type author_profiles: list[Profile] :param user_profile: Profile for which the conflicts will be found :type user_profile: Profile :param policy: Policy can be either a function or a string. If it is a function, it will be called with the user Profile and the author Profile as arguments. If it is a string, it will be used to find the corresponding function in the default policy dictionary. If no policy is passed, the default policy will be used. :type policy: str or function, optional :param n_years: Number of years to be considered for conflict detection. :type n_years: int, optional :return: List containing all the conflicts between the user Profile and the author Profiles :rtype: list[str] """ author_ids = set() author_domains = set() author_emails = set() author_relations = set() author_publications = set() if callable(policy): info_function = info_function_builder(policy) elif policy == 'NeurIPS': info_function = info_function_builder(get_neurips_profile_info) elif policy == 'Comprehensive': info_function = info_function_builder(get_comprehensive_profile_info) else: info_function = info_function_builder(get_profile_info) for profile in author_profiles: author_info = info_function(profile, n_years) author_ids.add(author_info['id']) author_domains.update(author_info['domains']) author_emails.update(author_info['emails']) author_relations.update(author_info['relations']) author_publications.update(author_info['publications']) user_info = info_function(user_profile, n_years) conflicts = set() conflicts.update(author_ids.intersection(set([user_info['id']]))) conflicts.update(author_domains.intersection(user_info['domains'])) conflicts.update(author_relations.intersection([user_info['id']])) conflicts.update(author_ids.intersection(user_info['relations'])) conflicts.update(author_emails.intersection(user_info['emails'])) conflicts.update(author_publications.intersection(user_info['publications'])) return list(conflicts)
[docs] def get_profile_info(profile, n_years=None): """ Gets all the domains, emails, relations associated with a Profile :param profile: Profile from which all the relations will be obtained :type profile: Profile :param n_years: Number of years to consider when getting the profile information :type n_years: int, optional :return: Dictionary with the domains, emails, and relations associated with the passed Profile :rtype: dict """ domains = set() emails = set() relations = set() publications = set() if n_years: cut_off_date = datetime.datetime.now() cut_off_date = cut_off_date - datetime.timedelta(days=365 * n_years) cut_off_year = cut_off_date.year else: cut_off_year = -1 ## Emails section for email in profile.content['emails']: # split email if '@' in email: domain = email.split('@')[1] domains.add(domain) else: print('Profile with invalid email:', profile.id, email) ## Institution section for history in profile.content.get('history', []): try: end = int(history.get('end', 0) or 0) except: end = 0 if not end or (int(end) > cut_off_year): domain = history.get('institution', {}).get('domain', '') domains.add(domain) ## Relations section relations = filter_relations_by_year(profile.content.get('relations', []), cut_off_year) ## Publications section: get publications within last n years, default is all publications from previous years publications = filter_publications_by_year(profile.content.get('publications', []), cut_off_year) return { 'id': profile.id, 'domains': domains, 'emails': emails, 'relations': relations, 'publications': publications }
[docs] def get_neurips_profile_info(profile, n_years=None): """ Gets all the domains, emails, relations associated with a Profile :param profile: Profile from which all the relations will be obtained :type profile: Profile :param n_years: Number of years to consider when getting the profile information :type n_years: int, optional :return: Dictionary with the domains, emails, and relations associated with the passed Profile :rtype: dict """ domains = set() emails=set() relations = set() publications = set() if n_years: cut_off_date = datetime.datetime.now() cut_off_date = cut_off_date - datetime.timedelta(days=365 * n_years) cut_off_year = cut_off_date.year else: cut_off_year = -1 ## Institution section, get history within the last n years, excluding internships for h in profile.content.get('history', []): position = h.get('position') if not position or (isinstance(position, str) and 'intern' not in position.lower()): try: end = int(h.get('end', 0) or 0) except: end = 0 if not end or (int(end) > cut_off_year): domain = h.get('institution', {}).get('domain', '') domains.add(domain) ## Relations section, get coauthor/coworker relations within the last n years + all the other relations relations = filter_relations_by_year(profile.content.get('relations', []), cut_off_year, ['Coauthor','Coworker']) ## if institution section is empty, add email domains if not domains: for email in profile.content['emails']: if '@' in email: domain = email.split('@')[1] domains.add(domain) else: print('Profile with invalid email:', profile.id, email) ## Publications section: get publications within last n years publications = filter_publications_by_year(profile.content.get('publications', []), cut_off_year) return { 'id': profile.id, 'domains': domains, 'emails': emails, 'relations': relations, 'publications': publications }
[docs] def get_comprehensive_profile_info(profile, n_years=None): """ Gets all the domains, emails, relations associated with a Profile :param profile: Profile from which all the relations will be obtained :type profile: Profile :param n_years: Number of years to consider when getting the profile information :type n_years: int, optional :return: Dictionary with the domains, emails, and relations associated with the passed Profile :rtype: dict """ domains = set() emails = set() relations = set() publications = set() if n_years: cut_off_date = datetime.datetime.now() cut_off_date = cut_off_date - datetime.timedelta(days=365 * n_years) cut_off_year = cut_off_date.year else: cut_off_year = -1 ## Institution section, get history within the last n years for h in profile.content.get('history', []): position = h.get('position') if not position or isinstance(position, str): try: end = int(h.get('end', 0) or 0) except: end = 0 if not end or (int(end) > cut_off_year): domain = h.get('institution', {}).get('domain', '') domains.add(domain) ## Relations section, get all relations within the last n years relations = filter_relations_by_year(profile.content.get('relations', []), cut_off_year, ['Coauthor','Coworker']) ## if institution section is empty, add email domains if not domains: for email in profile.content['emails']: if '@' in email: domain = email.split('@')[1] domains.add(domain) else: print('Profile with invalid email:', profile.id, email) ## Publications section: get publications within last n years publications = filter_publications_by_year(profile.content.get('publications', []), cut_off_year) return { 'id': profile.id, 'domains': domains, 'emails': emails, 'relations': relations, 'publications': publications }
[docs] def get_current_submissions_profile_info(profile, n_years=None, submission_venueid=None): """ Gets only submissions submitted to the current venue :param profile: Profile from which all publications will be obtained :type profile: Profile :param submission_venue_id: venue_id of submissions we want to obtain :type submission_venue_id: str :return: Dictionary with the current publications associated with the passed Profile :rtype: dict """ domains = set() relations = set() publications = set() if n_years is not None: cut_off_date = datetime.datetime.now() cut_off_date = cut_off_date - datetime.timedelta(days=365 * n_years) cut_off_year = cut_off_date.year else: cut_off_year = -1 ## Institution section, get history within the last n years, excluding internships for h in profile.content.get('history', []): position = h.get('position') if not position or (isinstance(position, str) and 'intern' not in position.lower()): try: end = int(h.get('end', 0) or 0) except: end = 0 if not end or (int(end) > cut_off_year): domain = h.get('institution', {}).get('domain', '') domains.add(domain) ## Relations section, get coauthor/coworker relations within the last n years + all the other relations relations = filter_relations_by_year(profile.content.get('relations', []), cut_off_year, ['Coauthor','Coworker']) ## Get publications for publication in profile.content.get('publications', []): if isinstance(publication.content.get('venueid'), dict) and publication.content['venueid']['value'] == submission_venueid: publications.add(publication.id) return { 'id': profile.id, 'domains': domains, 'emails': set(), 'relations': relations, 'publications': publications }
def filter_publications_by_year(publications, cut_off_year): def extract_year(publication_id, timestamp): try: return int(datetime.datetime.fromtimestamp(timestamp/1000).year) except: print('Error extracting the date for publication: ', publication_id) return None ## Publications section: get publications within last n years ## 1. try to get the year from the publication date ## 2. if not available, try to get the year from the content year field ## 3. if not available, try to get the year from the creation date filtered_publications = set() current_year = datetime.datetime.now().year for publication in publications: year = None if publication.pdate: year = extract_year(publication.id, publication.pdate) if not year and 'year' in publication.content: unformatted_year = None if isinstance(publication.content['year'], dict) and 'value' in publication.content['year']: unformatted_year = publication.content['year']['value'] elif isinstance(publication.content['year'], str): unformatted_year = publication.content['year'] try: converted_year = int(unformatted_year) if converted_year <= current_year: year = converted_year except Exception as e: year = None if not year: year = extract_year(publication.id, publication.cdate if publication.cdate else publication.tcdate) if year and year > cut_off_year: filtered_publications.add(publication.id) return filtered_publications def filter_relations_by_year(relations, cut_off_year, only_relations=None): filtered_relations = set() for r in relations: relation_id = r.get('profile_id', r.get('username', r.get('email'))) if relation_id: end = None try: end = int(r.get('end')) except: end = None if only_relations is None or r.get('relation', '') in only_relations: if end is None or end > cut_off_year: filtered_relations.add(relation_id) else: filtered_relations.add(relation_id) return filtered_relations
[docs] def post_bulk_edges(client, edges, batch_size = 50000): """Post a large list of Edges in batches with a progress bar. Splits the edge list into chunks of ``batch_size`` and posts each chunk via ``client.post_edges()``. Returns all posted Edge objects. :param client: Client used to post the Edges. :type client: Client :param edges: List of Edge objects to post. :type edges: list[Edge] :param batch_size: Number of edges per batch. Default: 50000. :type batch_size: int, optional :return: List of all posted Edge objects across all batches. :rtype: list[Edge] """ num_edges = len(edges) result = [] for i in tqdm(range(0, num_edges, batch_size), total=(num_edges // batch_size + 1)): end = min(i + batch_size, num_edges) batch = client.post_edges(edges[i:end]) result += batch return result
[docs] def post_bulk_tags(client, tags, batch_size = 50000): """Post a large list of Tags in batches with a progress bar. Splits the tag list into chunks of ``batch_size`` and posts each chunk via ``client.post_tags()``. Returns all posted Tag objects. :param client: Client used to post the Tags. :type client: Client :param tags: List of Tag objects to post. :type tags: list[Tag] :param batch_size: Number of tags per batch. Default: 50000. :type batch_size: int, optional :return: List of all posted Tag objects across all batches. :rtype: list[Tag] """ num_tags = len(tags) result = [] for i in tqdm(range(0, num_tags, batch_size), total=(num_tags // batch_size + 1)): end = min(i + batch_size, num_tags) batch = client.post_tags(tags[i:end]) result += batch return result
[docs] def overwrite_pdf(client, note_id, file_path): """ Overwrite all the references of a note with the new pdf file. If the note has an original note then update original references """ note = client.get_note(id=note_id) original_note = note if note.original: original_note = client.get_note(id=note.original) references = client.get_references(referent=original_note.id) invitaiton_id = original_note.invitation updated_references = [] if references: pdf_url = client.put_attachment(file_path, invitaiton_id, 'pdf') for reference in references: if 'pdf' in reference.content: reference.content['pdf'] = pdf_url updated_references.append(client.post_note(reference)) return updated_references
def pretty_id(group_id): if not group_id: return '' if group_id.startswith('~') and len(group_id): return re.sub('[0-9]+', '', group_id.replace('~', '').replace('_', ' ')) if group_id in ['everyone', '(anonymous)', '(guest)', '~']: return group_id tokens = group_id.split('/') transformed_tokens = [] for token in tokens: transformed_token=re.sub(r'\..+', '', token).replace('-', '').replace('_', ' ') letters_only=re.sub(r'\d|\W', '', transformed_token) has_no_ascii=not re.search(r'[a-zA-Z0-9]', transformed_token) if letters_only != transformed_token.lower() or (has_no_ascii and transformed_token): transformed_tokens.append(transformed_token) return ' '.join(transformed_tokens) def export_committee(client, committee_id, file_name): members=client.get_group(committee_id).members profiles=get_profiles(client, members) with open(file_name, 'w') as outfile: csvwriter = csv.writer(outfile, delimiter=',') for profile in tqdm(profiles): s = csvwriter.writerow([profile.get_preferred_email(), profile.get_preferred_name(pretty=True)]) def get_own_reviews(client): baseurl_v1, baseurl_v2 = get_base_urls(client) client_v1 = openreview.Client(baseurl=baseurl_v1, token=client.token) client_v2 = openreview.api.OpenReviewClient(baseurl=baseurl_v2, token=client.token) # Get all the reviews from v1 notes_v1 = client_v1.get_all_notes(tauthor=True) submissions_and_official_reviews = [] # Filter Official Reviews for note in notes_v1: # Make sure that the Official Review is public if 'Official_Review' not in note.invitation or 'everyone' not in note.readers: continue submission_id = note.forum # Make sure that the submission is public submission = client_v1.get_note(submission_id) if 'everyone' not in submission.readers: continue # Add both submission and note submissions_and_official_reviews.append((submission, note, 1)) # Get all the reviews from v2 profile_id = 'Guest' if not getattr(client, 'profile') else getattr(getattr(client, 'profile'), 'id') if profile_id == 'Guest': notes_v2 = [] else: notes_v2 = client_v2.get_all_notes(signature=profile_id, transitive_members=True) # TMLR was created before the invitation names were added to the # group content, so we need to hardcode it domain_to_reviewer_invitation_suffix = { 'TMLR': '/-/Review' } # Filter Official Reviews for note in notes_v2: # Get review invitation name from domain group content if domain_to_reviewer_invitation_suffix.get(note.domain) is None: domain = note.domain group = client_v2.get_group(domain) reviewer_invitation_suffix = getattr(group, 'content', None) if group and reviewer_invitation_suffix: reviewer_invitation_suffix = group.content.get('review_name', {}).get('value', None) if reviewer_invitation_suffix is None: continue domain_to_reviewer_invitation_suffix[domain] = '/-/' + reviewer_invitation_suffix reviewer_invitation_suffix = domain_to_reviewer_invitation_suffix[note.domain] # Make sure that the Official Review is public official_review = None for invitation in note.invitations: if reviewer_invitation_suffix in invitation: official_review = note if official_review is None or 'everyone' not in note.readers: continue submission_id = official_review.forum # Make sure that the submission is public submission = client_v2.get_note(submission_id) if 'everyone' not in submission.readers: continue # Add both submission and note submissions_and_official_reviews.append((submission, official_review, 2)) links = [] for submission, official_review, version in submissions_and_official_reviews: submission_link = f'https://openreview.net/forum?id={submission.id}' review_link = f'https://openreview.net/forum?id={submission.id}&noteId={official_review.id}' submission_title = '' if version == 1: submission_title = submission.content.get('title', '') else: submission_title = submission.content.get('title', {}).get('value', '') links.append({ 'submission_title': submission_title, 'submission_link': submission_link, 'review_link': review_link }) return links def get_base_urls(client): env = _identify_environment(client.baseurl) if env == 'dev': return [DEV_API_V1, DEV_API_V2] if env == 'prod': return [PROD_API_V1, PROD_API_V2] return [LOCAL_API_V1, LOCAL_API_V2] def get_site_url(client): env = _identify_environment(client.baseurl) if env == 'dev': return DEV_SITE if env == 'prod': return PROD_SITE return LOCAL_SITE def resend_emails(client, request_id, groups): message_requests = client.get_message_requests(id=request_id) assert len(message_requests) == 1, 'Request not found' message_request = message_requests[0] message_request_optional_params = { 'sender': {} } if 'signature' in message_request: message_request_optional_params['signature'] = message_request['signature'] if 'invitation' in message_request: message_request_optional_params['invitation'] = message_request['invitation'] if 'ignoreRecipients' in message_request: message_request_optional_params['ignoreRecipients'] = message_request['ignoreRecipients'] if 'fromName' in message_request: message_request_optional_params['sender']['fromName'] = message_request['fromName'] if 'fromEmail' in message_request: message_request_optional_params['sender']['fromEmail'] = message_request['fromEmail'] if 'replyTo' in message_request: message_request_optional_params['replyTo'] = message_request['replyTo'] if 'parentGroup' in message_request: message_request_optional_params['parentGroup'] = message_request['parentGroup'] client.post_message_request(message_request['subject'], groups, message_request['message'], **message_request_optional_params) def get_invitation_source(invitation, domain): submission_venue_id = domain.content.get('submission_venue_id', {}).get('value', None) venue_id = domain.id review_name = domain.content.get('review_name', {}).get('value', None) meta_review_name = domain.content.get('meta_review_name', {}).get('value', None) rebuttal_name = domain.content.get('rebuttal_name', {}).get('value', None) source = invitation.content.get('source', { 'value': { 'venueid': submission_venue_id } }).get('value', { 'venueid': submission_venue_id }) if invitation.content else {} ## Deprecated, user source as dictionary if isinstance(source, str): if source == 'all_submissions': source = { 'venueid': submission_venue_id } elif source == 'accepted_submissions': source = { 'venueid': [venue_id, submission_venue_id], 'with_decision_accept': True } elif source == 'public_submissions': source = { 'venueid': submission_venue_id, 'readers': ['everyone'] } elif source == 'flagged_for_ethics_review': source = { 'venueid': submission_venue_id, 'content': { 'flagged_for_ethics_review': True } } ## ## Deprecated, use source instead reply_to = invitation.content.get('reply_to', {}).get('value', 'forum') if invitation.content else False if isinstance(reply_to, str): if reply_to == 'reviews': source['reply_to'] = review_name elif reply_to == 'metareviews': source['reply_to'] = meta_review_name elif reply_to == 'rebuttals': source['reply_to'] = rebuttal_name elif not (reply_to == 'forum' or reply_to == 'withForum'): source['reply_to'] = reply_to ## ## Depreated, use source instead source_submissions_query = invitation.content.get('source_submissions_query', {}).get('value', {}) if invitation.content else {} for key, value in source_submissions_query.items(): if 'content' not in source: source['content'] = {} source['content'][key] = value ## return source
[docs] def should_match_invitation_source(client, invitation, submission, note=None, domain=None): """ Checks if the invitation source matches the submission and note. """ if domain is None: domain = client.get_group(submission.domain) source = get_invitation_source(invitation, domain) if not source: return False if submission.content['venueid']['value'] not in source.get('venueid', []): return False if 'reply_to' in source and not note: return False if 'reply_to' in source and note and not note.invitations[0].endswith(f'/-/{source.get("reply_to")}'): return False if 'reply_to' not in source and note: return False if 'readers' in source and not set(source['readers']).issubset(set(submission.readers)): return False if 'content' in source: for key, value in source.get('content', {}).items(): if value != submission.content.get(key, {}).get('value'): return False if 'with_decision_accept' in source: with_decision_accept = source.get('with_decision_accept') print('checking decision accept for submission', submission.id, 'with_decision_accept', with_decision_accept) decision_invitation_id = f'{domain.id}/{domain.content["submission_name"]["value"]}{submission.number}/-/{domain.content.get("decision_name", {}).get("value", "Decision")}' replies = submission.details.get('replies', submission.details.get('directReplies')) if replies is None: decision_notes = client.get_notes(forum=submission.id, invitation=decision_invitation_id) else: decision_notes = [openreview.api.Note.from_json(note) for note in replies if note['invitations'][0] == decision_invitation_id] if not decision_notes: return False accept_options = domain.content.get('accept_decision_options', {}).get('value') decision_value = decision_notes[0].content[domain.content.get('decision_field_name', {}).get('value', 'decision')]['value'] if is_accept_decision(decision_value, accept_options) != with_decision_accept: return False content_keys = invitation.edit.get('content', {}).keys() if 'withdrawalId' in content_keys: return False if 'deskRejectionId' in content_keys: return False if 'noteReaders' in content_keys: return False if content_keys and 'noteId' not in content_keys: return False if content_keys and 'noteNumber' not in content_keys: return False if note and 'replyto' not in content_keys: return False return True
def is_forum_invitation(invitation): content_keys = invitation.edit.get('content', {}).keys() if 'noteId' not in content_keys: return False if 'noteNumber' not in content_keys: return False if 'replyto' in content_keys: return False return True def create_replyto_invitations(client, submission, note): venue_invitations = [i for i in client.get_all_invitations(prefix=note.domain + '/-/', type='invitation', domain=note.domain) if i.is_active()] for invitation in venue_invitations: print('processing invitation: ', invitation.id) if should_match_invitation_source(client, invitation, submission, note): print('create invitation: ', invitation.id) content = { 'noteId': { 'value': note.forum }, 'noteNumber': { 'value': submission.number }, 'replyto': { 'value': note.id } } content_keys = invitation.edit.get('content', {}).keys() if 'replytoSignatures' in content_keys: content['replytoSignatures'] = { 'value': note.signatures[0] } if 'replyNumber' in content_keys: content['replyNumber'] = { 'value': note.number } if 'invitationPrefix' in content_keys: content['invitationPrefix'] = { 'value': note.invitations[0].replace('/-/', '/') + str(note.number) } if 'replytoReplytoSignatures' in content_keys: content['replytoReplytoSignatures'] = { 'value': client.get_note(note.replyto).signatures[0] } client.post_invitation_edit(invitations=invitation.id, content=content, invitation=openreview.api.Invitation() ) else: print('skipping invitation: ', invitation.id, ' - does not match source') def create_forum_invitations(client, submission): invitation_invitations = [i for i in client.get_all_invitations(prefix=submission.domain + '/-/', type='invitation', domain=submission.domain) if i.is_active() and i.date_processes] for invitation in invitation_invitations: print('processing invitation: ', invitation.id) if should_match_invitation_source(client, invitation, submission): print('create invitation: ', invitation.id) client.post_invitation_edit(invitations=invitation.id, content={ 'noteId': { 'value': submission.id }, 'noteNumber': { 'value': submission.number } }, invitation=openreview.api.Invitation() ) else: print('skipping invitation: ', invitation.id, ' - does not match source') if is_forum_invitation(invitation): forum_invitations = client.get_invitations(replyForum=submission.id, invitation=invitation.id) for forum_invitation in forum_invitations: print('delete invitation: ', forum_invitation.id) client.post_invitation_edit( invitations=f'{submission.domain}/-/Edit', signatures=[submission.domain], invitation=openreview.api.Invitation(id=forum_invitation.id, ddate=openreview.tools.datetime_millis(datetime.datetime.now()) ) ) def singularize(word): if word.endswith('ies'): return word[:-3] + 'y' elif word.endswith('es'): return word[:-2] elif word.endswith('s'): return word[:-1] return word
[docs] def percentile(data, percent): """Return the percentile value from *data* using linear interpolation, matching the behaviour of numpy.percentile with the default 'linear' method. *percent* may be an int or float in [0, 100]. *data* must be a non-empty sequence of numbers. """ if not data: raise ValueError("data must be non-empty") sorted_data = sorted(data) n = len(sorted_data) if n == 1: return sorted_data[0] # NumPy linear interpolation: index = percent/100 * (n - 1) idx = percent / 100.0 * (n - 1) lo = int(idx) hi = lo + 1 if hi >= n: return sorted_data[-1] frac = idx - lo return sorted_data[lo] + frac * (sorted_data[hi] - sorted_data[lo])