A simple emoji picker for rofi with multi-selection
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

extract_emojis.py 3.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. from collections import namedtuple
  2. from typing import List, Set, Dict
  3. import requests
  4. from bs4 import BeautifulSoup
  5. from lxml import etree
  6. from lxml.etree import XPath
  7. Emoji = namedtuple('Emoji', 'char name')
  8. def fetch_emoji_list() -> List[Emoji]:
  9. return extract_from_html(fetch_emoji_html())
  10. def fetch_emoji_html() -> BeautifulSoup:
  11. max_tries = 5
  12. for i in range(max_tries):
  13. print('Downloading emojis... try %s' % (i + 1))
  14. data = requests.get(
  15. 'https://unicode.org/emoji/charts-12.0/full-emoji-list.html',
  16. timeout=120
  17. ) # type: requests.Response
  18. if data:
  19. break
  20. if not data:
  21. print('Could not fetch emoji data. Try again later or use another URL.')
  22. exit(10)
  23. return BeautifulSoup(data.content, 'lxml')
  24. def extract_from_html(html: BeautifulSoup) -> List[Emoji]:
  25. emojis = []
  26. for row in html.find('table').find_all('tr'):
  27. if row.th:
  28. continue
  29. emoji = row.find('td', {'class': 'chars'}).string
  30. description = row.find('td', {'class': 'name'}).string.replace('⊛ ', '')
  31. emojis.append(Emoji(emoji, description))
  32. return emojis
  33. def fetch_human_emojis() -> List[chr]:
  34. print('Downloading list of human emojis...')
  35. data = requests.get(
  36. 'https://unicode.org/Public/emoji/12.0/emoji-data.txt',
  37. timeout=60
  38. ) # type: requests.Response
  39. started = False
  40. emojis = []
  41. for line in data.content.decode(data.encoding).split('\n'):
  42. if not started and line != '# All omitted code points have Emoji_Modifier_Base=No ':
  43. continue
  44. started = True
  45. if started and line == '# Total elements: 120':
  46. break
  47. if started and (line.startswith('#') or len(line) == 0):
  48. continue
  49. emojis.extend(extract_emojis_from_line(line))
  50. return emojis
  51. def extract_emojis_from_line(line: str) -> List[chr]:
  52. emoji_range = line.split(';')[0].strip()
  53. try:
  54. (start, end) = emoji_range.split('..')
  55. emojis = []
  56. for char in range(int(start, 16), int(end, 16) + 1):
  57. emojis.append(chr(char))
  58. return emojis
  59. except ValueError:
  60. return [chr(int(emoji_range, 16))]
  61. def fetch_annotations() -> Dict[chr, List[str]]:
  62. print('Downloading annotations')
  63. data = requests.get(
  64. 'https://raw.githubusercontent.com/unicode-org/cldr/release-35-1/common/annotations/en.xml',
  65. timeout=60
  66. ) # type: requests.Response
  67. xpath = XPath('./annotations/annotation[not(@type="tts")]')
  68. return {element.get('cp'): element.text.split(' | ') for element in
  69. xpath(etree.fromstring(data.content))}
  70. def write_file(all_emojis: List[Emoji], human_emojis: Set[chr], annotations: Dict[chr, List[str]]):
  71. print('Writing collected emojis to file')
  72. python_file = open('emojis.py', 'w')
  73. python_file.write('emoji_list="""')
  74. for entry in compile_entries(all_emojis, annotations):
  75. python_file.write(entry + "\n")
  76. python_file.write('"""\n\n')
  77. python_file.write('skin_tone_selectable_emojis={\'')
  78. python_file.write('\', \''.join(human_emojis))
  79. python_file.write('\'}\n')
  80. python_file.close()
  81. def compile_entries(emojis: List[Emoji], annotations: Dict[chr, List[str]]) -> List[str]:
  82. annotated_emojis = []
  83. for emoji in emojis:
  84. if emoji.char in annotations:
  85. entry = f"{emoji.char} {emoji.name} <small>({', '.join(annotations[emoji.char])})</small>"
  86. else:
  87. entry = f"{emoji.char} {emoji.name}"
  88. annotated_emojis.append(entry)
  89. return annotated_emojis
  90. if __name__ == "__main__":
  91. write_file(fetch_emoji_list(), fetch_human_emojis(), fetch_annotations())