diff --git a/.ackrc b/.ackrc index 0e0073a801..44d645a5d2 100644 --- a/.ackrc +++ b/.ackrc @@ -3,4 +3,5 @@ --ignore-directory=is:.mypy_cache --ignore-directory=is:.pytest_cache --ignore-directory=is:.ruff_cache +--ignore-directory=is:.venv --ignore-directory=is:site diff --git a/.github/workflows/code-quality.yml b/.github/workflows/code-quality.yml index 3e1c9acd8f..2fc1f377fb 100644 --- a/.github/workflows/code-quality.yml +++ b/.github/workflows/code-quality.yml @@ -24,7 +24,6 @@ jobs: fail-fast: false matrix: python-version: ["3.10", "3.11", "3.12", "3.13"] - poetry-version: ["2.1.3"] os: [ubuntu-latest, macos-latest, windows-latest] runs-on: ${{ matrix.os }} steps: @@ -35,9 +34,8 @@ jobs: with: python-version: ${{ matrix.python-version }} - name: Install Poetry - uses: abatilo/actions-poetry@v3 - with: - poetry-version: ${{ matrix.poetry-version }} + run: | + pipx install poetry --python python${{ matrix.python-version }} - name: Setup a local virtual environment (if no poetry.toml file) run: | poetry config virtualenvs.create true --local diff --git a/python/CHANGELOG.md b/python/CHANGELOG.md index ff2aadaa04..3a155bb98e 100644 --- a/python/CHANGELOG.md +++ b/python/CHANGELOG.md @@ -1,5 +1,17 @@ # Changelog +## [0.5.4] — 2025-11-27 + +This release only exists to provide necessary functionality for transitioning to the [new author ID system](https://github.com/acl-org/acl-anthology/wiki/Author-Page-Plan). + +### Added + +- NameSpecification now provides an `orcid` field. + +### Changed + +- Updated the XML schema to match the data folder. + ## [0.5.3] — 2025-06-22 This release adds more functionality for ingesting new proceedings and modifying existing data. diff --git a/python/acl_anthology/data/schema.rnc b/python/acl_anthology/data/schema.rnc index dabcbd8bed..817d4cc60a 100644 --- a/python/acl_anthology/data/schema.rnc +++ b/python/acl_anthology/data/schema.rnc @@ -12,11 +12,14 @@ MarkupText = (text | b | i | url | fixed-case | tex-math )+ first = element first { xsd:string { pattern="(\S(.*\S)?)?" } } last = element last { xsd:string { pattern="\S(.*\S)?" } } affiliation = element affiliation { text } -orcid = element orcid { text } -google-scholar = element google-scholar{ text } -semantic-scholar = element semantic-scholar { text } Variant = element variant { attribute script { xsd:string }, (first? & last) } -Person = attribute id { xsd:NCName }?, (first? & last & Variant? & affiliation? & orcid? & google-scholar? & semantic-scholar? ) +# Store bare 16-digit hyphenated ORCID iD with checksum (last char may be X) +OrcidId = xsd:string { pattern="([0-9]{4}-){3}[0-9]{3}[0-9X]" } + +Person = + attribute id { xsd:NCName }?, + attribute orcid { OrcidId }?, + (first? & last & Variant? & affiliation?) local-filename = xsd:string { pattern="[A-Za-z0-9._\-]+" } bibtex-key = xsd:string { pattern="[A-Za-z0-9\-]+" } diff --git a/python/acl_anthology/people/name.py b/python/acl_anthology/people/name.py index 289787dbf0..6d8e1bad41 100644 --- a/python/acl_anthology/people/name.py +++ b/python/acl_anthology/people/name.py @@ -250,8 +250,9 @@ class NameSpecification: Attributes: name: The person's name. - id: Unique ID for the person that this name refers to. Defaults to `None`. - affiliation: Professional affiliation. Defaults to `None`. + id: Unique ID for the person that this name refers to. + orcid: An ORCID that was supplied together with this name. + affiliation: Professional affiliation. variants: Variant spellings of this name in different scripts. Note: @@ -263,6 +264,7 @@ class NameSpecification: name: Name = field(converter=_Name_from) id: Optional[str] = field(default=None, validator=v.optional(v.instance_of(str))) + orcid: Optional[str] = field(default=None, validator=v.optional(v.instance_of(str))) affiliation: Optional[str] = field( default=None, validator=v.optional(v.instance_of(str)) ) @@ -321,6 +323,7 @@ def from_xml(cls, person: etree._Element) -> NameSpecification: return cls( Name(first, cast(str, last)), id=person.get("id"), + orcid=person.get("orcid"), affiliation=affiliation, variants=variants, ) @@ -336,6 +339,8 @@ def to_xml(self, tag: str = "author") -> etree._Element: elem = etree.Element(tag) if self.id is not None: elem.set("id", self.id) + if self.orcid is not None: + elem.set("orcid", self.orcid) elem.extend( ( E.first(self.first) if self.first else E.first(), diff --git a/python/justfile b/python/justfile index 0e47f67d39..dcc05c2627 100644 --- a/python/justfile +++ b/python/justfile @@ -122,7 +122,7 @@ prepare-new-release VERSION: no-uncommitted-changes check test-all test-integrat echo "(To undo: git reset HEAD~ )" echo "" echo "Next steps:" - echo " 1. git tag py-v$VERSION && git push --tags" - echo " 2. poetry publish" - echo " 3. Create PR python-dev -> master" + echo " 1. Create PR to master, then after merging:" + echo " 2. git tag py-v$VERSION && git push --tags" + echo " 3. poetry publish" echo " 4. (optionally) Create a release on Github" diff --git a/python/pyproject.toml b/python/pyproject.toml index 965d39c201..a8e2b4037b 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -43,7 +43,7 @@ name = "acl-anthology" packages = [ { include = "acl_anthology" }, ] -version = "0.5.3" +version = "0.5.4" description = "A library for accessing the ACL Anthology" authors = [ { name = "Marcel Bollmann", email = "marcel@bollmann.me" },