A script to sort the entries of my bibTeX file sci.bib in their natural order, which is namexyα with name the first Author's last name, xy the last two digits of the year of publication and α a letter a, b, c etc. to lift degeneracy when the same Author publishes more than one paper per year. This means that kavokin98a should appear before kavokin21a because 1998<2021, despite 98>21.
python orderbib.py sci.bib > sorted.bib
On stderr, the number of invalid entries (if any) is given in addition to the total number of entries processed, e.g., when using it for the first time before switching to automatic insertions of doi2bib:
Total entries: 4405
Valid entries: 4405
Invalid entries: 0
First version v°1.0 on 8 September (2025).
# ___ _ ____ _ _
# / _ \ _ __ __| | ___ _ __| __ )(_) |__
#| | | | '__/ _` |/ _ \ '__| _ \| | '_ \
#| |_| | | | (_| | __/ | | |_) | | |_) |
# \___/|_| \__,_|\___|_| |____/|_|_.__/
# F.P. Laussy http://laussy.org/wiki/orderbib
# v°1.0 - Mon Sep 8 05:26:26 PM CEST 2025
# This reorder my bib, according to my rules!
import re
import sys
def main():
if len(sys.argv) != 2:
print("Usage: python reorder_bibtex.py <bibtex_file>", file=sys.stderr)
sys.exit(1)
filename = sys.argv[1]
with open(filename, 'r') as f:
content = f.read()
# Find @string entries
string_pattern = r'(@string\s*\{.*?}\s*)'
strings = re.findall(string_pattern, content, re.DOTALL | re.IGNORECASE)
# Find all BibTeX entries, excluding @string
entry_pattern = r'(@(?!string\b)\w+\{([^,]+),\s*(.*?)\n\s*\}\s*)'
entries = re.findall(entry_pattern, content, re.DOTALL | re.IGNORECASE)
valid = []
invalid_keys = []
for full_entry, key, fields in entries:
# Extract year
year_match = re.search(r'year\s*=\s*\{?(\d+)\}?,?', fields, re.IGNORECASE)
if not year_match:
invalid_keys.append(key)
continue
full_year = int(year_match.group(1))
# Check key format: name (letters) + 2 digits + 1 letter
key_match = re.match(r'([a-z]+)(\d{2})([a-z])$', key.strip(), re.IGNORECASE)
if not key_match:
invalid_keys.append(key)
continue
name, yy, letter = key_match.groups()
# Optionally: check if yy matches last two digits of year
# if yy != str(full_year)[-2:]:
# invalid_keys.append(key)
# continue
valid.append({
'name': name.lower(), # case-insensitive, but assuming lowercase
'full_year': full_year,
'letter': letter.lower(),
'full_entry': full_entry.strip()
})
# Sort valid entries: by name (str), full_year (int), letter (str)
valid_sorted = sorted(valid, key=lambda x: (x['name'], x['full_year'], x['letter']))
# Output statistics to stderr
total_entries = len(entries)
valid_count = len(valid_sorted)
invalid_count = len(invalid_keys)
print(f"Total entries: {total_entries}", file=sys.stderr)
print(f"Valid entries: {valid_count}", file=sys.stderr)
print(f"Invalid entries: {invalid_count}", file=sys.stderr)
if invalid_keys:
print("Invalid keys:", file=sys.stderr)
for k in invalid_keys:
print(k, file=sys.stderr)
# Output @string entries to stdout without extra newlines
if strings:
print('\n'.join(s.strip() for s in strings))
print() # Single newline after allMarina, single empty line between @string entries and BibTeX entries
# Output sorted entries to stdout
if valid_sorted:
for item in valid_sorted:
print(item['full_entry'])
print() # Empty line between entries
if __name__ == "__main__":
main()