Program to classify Telegram channels and groups. The program ranks channels and groups by number of subscribers and number of messages.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

783 lines
47 KiB

#coding=utf-8
# https://try.gitea.io/egonalbrecht/mass_ban_telegram or https://git.teknik.io/egonalbrecht/mass_ban_telegram
# Author: Telegram -> @chavespatriota, Chat ID = 1670082376, https://t.me/s/canaisfascistasbr, 16-08-2020.
#See the video tutorial on the website above to learn how to use the program.
#Program made in Linux. Edit if necessary.
#Not use the name 'telethon' in program name. Use python 3. Install all packages that are requested. Use 'pip3 install' for this. To run the program on Linux, open the terminal in the folder where the program is and run the command 'python3 program.py'.
#Starting point: https://medium.com/game-of-data/telegram-channel-data-extraction-users-information-chats-and-specific-messages-and-data-21bb54710fd3
#
#Your telephone number will be requested. Enter your phone name with the country code (US is +1).
#Enter the password that is requested. If you don't have a password on Telegram, go to the app and create one.
#It is no necessary to be a subscribed from the channel to classify it.
#Importing required libraries
from telethon import TelegramClient, sync
import io #Necessary for Windows.
from telethon.tl.functions.channels import GetFullChannelRequest
from telethon import errors
from telethon.tl.types import InputChannel #Allows you to access channels, even without being subscribed to it
#Access channels without being subscribed to it, using 'access_hash' (ID for entity pairs, user-channel in case). You need to join the channel one time at last for get the 'access_has' between you and it.
from telethon.tl.functions.channels import JoinChannelRequest #https://t.me/TelethonChat/286309
from telethon.tl.functions.channels import LeaveChannelRequest
import time
from time import sleep
import re #https://stackoverflow.com/a/4998688, https://stackoverflow.com/questions/4998629/split-string-with-multiple-delimiters-in-python
from telethon.tl.functions.messages import CheckChatInviteRequest, ImportChatInviteRequest #https://t.me/TelethonChat/249666, https://t.me/TelethonChat/388378
import os.path #https://www.guru99.com/python-check-if-file-exists.html
from os import path
import datetime
from datetime import date #https://www.guru99.com/date-time-and-datetime-classes-in-python.html
from telethon import functions, types #https://tl.telethon.dev/methods/channels/set_discussion_group.html
import os
#Wait time between interactions with Telegram. You need this, because Telegram impose a wait time. Each function have a different wait time.
wait_time = 10
time_add = 10 #If you get a suspension for abuse, you need increase wait.
time_add_2 = 15 #Send messages need a more high wait time
num_channels_per_part = 45 #Telegram limits the number of links in a specific post. That number is the number of channels per part per category. The maximum seems to be 48.
#Sound alarm. You need install 'sox' program in Linux https://stackoverflow.com/questions/16573051/sound-alarm-when-code-finishes
duration = 1 # seconds
freq = 500 # Hz
#Read personal data
file3_in = io.open("Files and Settings/api_id and api_hash.txt", 'r', encoding="utf-8") #Open file.
id_hash_temp = file3_in.readlines() #Read all lines
id_hash_temp = [x.strip() for x in id_hash_temp] #It removes the '\n' from all entries in the list.
for i_id_hash_temp in id_hash_temp:
if i_id_hash_temp[0] != '#': #Skip the line with '#' at the beginning.
i_id_hash_temp = str(i_id_hash_temp)
i_id_hash_temp = i_id_hash_temp.replace(" ","")
i_id_hash_temp = i_id_hash_temp.replace("'","")
if i_id_hash_temp[0:7] == 'api_id=':
api_id = int(i_id_hash_temp[7:])
if i_id_hash_temp[0:9] == 'api_hash=':
api_hash = str(i_id_hash_temp[9:])
if i_id_hash_temp[0:13] == 'session_name=':
session_name = str(i_id_hash_temp[13:])
file3_in.close()
print('Your personal data is:')
print(' api_id =', api_id)
print(' api_hash =', api_hash)
print(' session_name = ', session_name)
print('------------------------------------')
client = TelegramClient(session_name, api_id, api_hash).start() #Login in Telegram
#Entity where the classification will be posted
entities_receive = []
with io.open('Files and Settings/Receiving Entities.txt') as f_inp: #Read from external file
for line in f_inp:
if '/+' in line: #New private link not works well in program yet
line = line.replace('/+','/joinchat/')
entities_receive.append(line.strip())
print('Do you want to post the classification in which entities? Enter the number of each entity and press ENTER.')
num_ent = [] #All entities number
i = 0
for ii in entities_receive: #Show options
num_ent.append(i)
print(' ' + str(i).split(' ID: ')[0].strip() + ": " + ii)
i += 1
print('------------------------------------')
receiving_entities = []
more = 'y'
list_yes = ['y', 'Y', 'Yes', 'yes']
list_no = ['n', 'N', 'No', 'no']
while more in list_yes: #Select options
receiving_entities.append(int(input()))
print('Some more? (y/n)')
more = input()
if (more not in list_yes) and (more not in list_no):
print('I did not understand. Closing...')
quit()
else:
print('Ok. Which one?')
for i in receiving_entities: #Number entity do not correspond a valid number entity
if i not in num_ent:
print('Invalid entity number. Closing...')
quit()
entities_receive_invite = []
entitie_receive_prefix_link_message = []
i = 0
for input1 in receiving_entities: #Get entity link, public or private
entities_receive_invite.append(entities_receive[input1].split(' ID: ')[0].strip())
entitie_receive_prefix_link_message.append(entities_receive[input1].split(' ID: ')[1].strip()) #Get prefix for messages link. Public channels is the same that channel link, private is 'c/' more channel ID.
if entitie_receive_prefix_link_message[i].isnumeric() == True: #Private channel. https://www.pythonpool.com/python-check-if-string-is-integer/
entitie_receive_prefix_link_message[i] = 'c/' + entitie_receive_prefix_link_message[i]
else: #Public channel
entitie_receive_prefix_link_message[i] = entities_receive_invite[i]
print('Printing classification on the entity -> ' + str(entities_receive_invite[i]))
i += 1
print('------------------------------------')
print(' ')
print('WARNING: Avoid problematic channels in the classification')
print(' ')
#Read list of channels for classification
id_channels = [] #Create a empty list. Variable that stores the data of the channels of the text file.
i_category = 0 #Quantity of categories. Each category is identified by an integer number.
i_channels_per_cat = 0 #Cumulative variable used to count the number of channels per category.
num_channels_per_cat = [] #List with the number of channels in each category.
not_repeat = []
cat_separator = '--------------------------------------------------- end of category ---------------------------------------------------\n'
id_channel_line = 'null'
msg_subscribers_type = []
date_last = ''
strdate = ''
today = date.today()
with io.open('Files and Settings/Entities to Classify.txt') as file_in: #It opens file for reading channels. It must be separated by category and at the end of each category there must be a line with the identifier like below.
for line in file_in:
if len(line.strip()) == 0: #Empty line, skip
continue
if line[0] == '#': #Comment line, skip
continue
if line[0] == '*': #Comment line, skip
continue
if '/+' in line: #New private link not works well in program yet
line = line.replace('/+','/joinchat/')
if line[0:6] == 'Date: ': #Read date last classification
strdate = line[6:].strip().split('-')
date_last = datetime.date(int(strdate[2]), int(strdate[1]), int(strdate[0])) #With object Python can do arithmetic calculations over time
continue
if line == cat_separator: # End of category. #https://www.jquery-az.com/4-ways-python-string-comparison-5-examples/#:~:text=Using%20the%20%3D%3D%20(equal%20to,evaluates%20as%20True%2C%20otherwise%20False.
i_category = i_category + 1
num_channels_per_cat.append(i_channels_per_cat) #Once the number of channels in a given category has been defined, it adds that amount to the list of the number of channels per category.
i_channels_per_cat = 0 #Counter variable reset.
else:
if line[0] == '@': #New channel added by your username with '@'
line = line[1:] #Removes the '@', because it cannot be used to access the channel. With prefixes 'https://t.me/' and 't.me' no problem.
line_break = line.split(' -+- ')
if (len(line_break) > 1) and (line_break[0][0] == '-'): #Doesn't include specific category in identifier
id_channel_line = line_break[0] #' -+- ' is the separator chosen to separate data entries from the list. Read file line as a list: https://stackoverflow.com/questions/14676265/how-to-read-a-text-file-into-a-list-or-an-array-with-python
else:
id_channel_line = line_break[0].split(' ')[0] #If the channel is new and have a specific category (a emoji 💎🧬🎵📱) put before link channel, it is necessary to make this
#Conditional to avoid repeated channels. WARNING: Does not work when a channel is classified for the first time
if id_channel_line in not_repeat:
print()
print("Channel " + id_channel_line.split(' ')[0].strip() + " is repeated!") #Select only ID number, not access_hash.
print()
else:
#Channel is not repeated, so adds it
not_repeat.append(id_channel_line) #For compare with other channels.
id_cat_channel = [] #For each channel
id_cat_channel.append(i_category) #1o inforamtion is category (integer).
i_channels_per_cat += 1
id_cat_channel.append(id_channel_line) #2o information is username or invite (string).
msg_subscribers_type = []
type_channel = []
if len(line_break) == 3: #Already classified
public_or_private = line_break[2].split(' ')[0] #It is private if the string has "/joinchat/", i. e., it is easy to identifies.
msg_subscribers_type = line_break[2].strip().split(' ') #Messages, number of subscribers and specific category, if the channel have one (memes, paganism, genetics ...)
msg_subscribers_type.pop(0) #Removes item in 0 postion, the username, and let only messages, subscribers and specific category
else: #Not classified yet
public_or_private = 'new' #String smaller than 3 cannot represent an entity.
msg_subscribers_type.append('new') #Doesn't have a message number
msg_subscribers_type.append('new') #Doesn't have a subscribers number
if len(line_break[0].strip().split(' ')) == 2: #Not classified but it have specific category
msg_subscribers_type.append(line_break[0].strip().split(' ')[1]) #Append type
id_cat_channel.append(public_or_private) #3o information is public or private (string).
id_cat_channel.append(msg_subscribers_type[0]) #4o information is messages number (integer).
id_cat_channel.append(msg_subscribers_type[1]) #5o information is subscribers number (integer).
if len(msg_subscribers_type) == 3:
id_cat_channel.append(msg_subscribers_type[2]) #6o information is specific category (string).
else:
id_cat_channel.append('')
id_channels.append(id_cat_channel) #Passes 6 inforamtions: category, username or invite link, public or private, messages, subscribes and specific category from the text file to a program variable in an cumulative way. The result is a 2D dimensional list. For example, in case of 350 channels, the list will have 350x6 dimension.
#If you want to see content of 'id_channels' uncomment this.
#for i in id_channels:
# print(i)
#Get Telegram data about channels and update list
channels_data = [] #A list of channels where each entry contains: [subscribers, channel name, number of posts, username, category]. By 'username' you can know if is public, private or not exist.
id_cat_channel = []
id_cat_channel_2 = []
i_cat = 0
list_out = '' #Output for all channels for future classifications and statistic
username_channel = ''
for id_cat_channel in id_channels: #One iteration for each channel
new_channel = []
channel_out = '' #Output for each channel
num_msg = id_cat_channel[3] #6 inforamtions: category [0], username or invite link [1], public or private [2], messages [3], subscribes [4] and specific category [5]
num_sub = id_cat_channel[4]
num_type = id_cat_channel[5]
#Obtain number of subscribers from any public channel. Source: https://github.com/LonamiWebs/Telethon/issues/597
#WARNING: Some channels can have problem with ID and is not recognized. It is as if the Chat ID of that channel corresponds to a channel that does not exist (ValueError: Could not find the input entity...). To get around this problem, the username was used instead of Chat ID. In the case of the channel '[📣]😡👊🏻 Putosjudios 🚫✡🚫' the username is putosjudios. Several channels that had had a problem with Chat ID after weeks stopped giving problems. The cause of this is unknown. client.get_input_entity() accepts both Chat ID and username, but so far Chat ID has been treated as a string. Therefore, a test is done to see if id_cat_channel[1] contains a number (Chat ID) or string (username). Modify the channels list file, if necessary.
#Just convert to integer
if (id_cat_channel[1][0] == '-'): #[0] selects the first character of id_cat_channel[1]. If it is '-', the channel is identifies by ID and access_hash numbers.
#Broken string with ID and access_hash in two
id_cat_channel_2 = id_cat_channel[1].strip().replace(' ', ' ') #Remove breal line and extra blank space
id_cat_channel_2 = id_cat_channel_2.replace(' ', ' ')
id_cat_channel_2 = id_cat_channel_2.replace(' ', ' ')
id_cat_channel_2 = id_cat_channel_2.split(' ') #broken identifier in ID and access_hash separately
#Convert to integer
id_cat_channel_2[0] = int(id_cat_channel_2[0][4:]) #Cut string, remove prefix '-100'
id_cat_channel_2[1] = int(id_cat_channel_2[1])
else:
id_cat_channel_2 = id_cat_channel[1] #Not classified before. Just a string. id_cat_channel[0] is the category.
#Category separator
if (id_cat_channel[0] != i_cat): #New category
channel_out += cat_separator
i_cat += 1 #Change category
time.sleep(wait_time) #Don't force Telegram
#Make a request to Telegram to pick up the data of a specific entity
if isinstance(id_cat_channel_2[0], int): #Channel already listed. Can be public or private. #https://note.nkmk.me/en/python-check-int-float/
try:
#WARNING: Some channels you can not access with username. Example: https://t.me/JornalDoGuerreiroGP. You need get ID and access hash via client.get_entity('https://t.me/JornalDoGuerreiroGP')
channel_full_info = client(GetFullChannelRequest(InputChannel(channel_id=id_cat_channel_2[0], access_hash=id_cat_channel_2[1]))) #https://t.me/TelethonChat/39013
# if channel_full_info.chats[0].username == None and '/joinchat/' not in id_cat_channel[2]: #Bug with some public channels, like https://t.me/romulomaraschin. Check if channel is public but without
# username_channel = client.get_entity(int('-100' + str(id_cat_channel_2[0]))).username
# print(username_channel)
# if username_channel != None:
# channel_full_info.chats[0].username = id_cat_channel[2]
except errors.FloodWaitError as e:
print('FLOODING ERROR! Details: ' + str(e))
wait_time += time_add
print('Send message interval increased by ' + str(time_add) + ' seconds.')
print('Wait time now is ' + str(wait_time) + ' seconds')
time.sleep(e.seconds+10)
channel_full_info = client(GetFullChannelRequest(InputChannel(channel_id=id_cat_channel_2[0], access_hash=id_cat_channel_2[1])))
except errors.ChannelPrivateError:
print(' ')
print("ERROR01! Entity " + str(id_cat_channel_2[0]) + " inaccessible!")
print(' ')
num_channels_per_cat[id_cat_channel[0]] -= 1
continue
except Exception as e:
print(' ')
print("ERROR02! Entity " + str(id_cat_channel_2[0]) + '. Details: ' + str(e))
print(' ')
num_channels_per_cat[id_cat_channel[0]] -= 1
continue
else:
pass
total_liq_posts = client.get_messages(id_cat_channel_2[0], limit=1).total #Gets number of publications from any plubic channel. It is already the total number of publications made minus those deleted.
#Test if private or public, because if private, tests if the invite link has expired, if public, get a more updated username.
if '/joinchat/' in id_cat_channel[2]:
try: #Test if the invitation is still worth it
client(CheckChatInviteRequest(id_cat_channel[2].split('/joinchat/')[1])) #Use only the code of invite link. https://stackoverflow.com/a/12572391
except errors.FloodWaitError as e:
print('FLOODING ERROR! Details: ' + str(e))
wait_time += time_add
print('Send message interval increased by ' + str(time_add) + ' seconds.')
print('Wait time now is ' + str(wait_time) + ' seconds')
time.sleep(e.seconds+10)
try:
client(CheckChatInviteRequest(id_cat_channel[2].split('/joinchat/')[1]))
except errors.FloodWaitError as e:
print('FLOODING ERROR AGAIN!!! SKIP! Details: ' + str(e))
wait_time += time_add
print('Send message interval increased by ' + str(time_add) + ' seconds.')
print('Wait time now is ' + str(wait_time) + ' seconds')
time.sleep(e.seconds+10)
continue
except Exception as e:
print()
print("ERROR0333! Details: " + str(e))
print()
num_channels_per_cat[id_cat_channel[0]] -= 1
continue
else:
pass
except Exception as e:
print()
print("ERROR03! Invite link expired or other. Details: " + str(e))
print()
num_channels_per_cat[id_cat_channel[0]] -= 1
continue
else:
pass
username_channel = id_cat_channel[2] #Just pass the private channel invite
else:
username_channel = channel_full_info.chats[0].username #Public channel. Update username.
else: #New channel. Can be public or private.
id_cat_channel_2 = id_cat_channel_2.strip() #Just remove breakline and blank space
try:
channel_full_info = client(GetFullChannelRequest(id_cat_channel_2)) #Get data. Some channels you can get data being not subscribed.
# if channel_full_info.chats[0].username == None: #Bug with some channels
# username_channel = client.get_entity(id_cat_channel_2).username
# if username_channel != None:
# channel_full_info.chats[0].username = id_cat_channel_2
except: #Not subscribed or have a problem with the link.
try:
#Try to join the channel.
client(JoinChannelRequest(id_cat_channel_2))
except:
try: #Maybe the new channel is private.
client(ImportChatInviteRequest(id_cat_channel_2.split('/joinchat/')[1]))
except errors.FloodWaitError as e:
print('FLOODING ERROR! Details: ' + str(e))
wait_time += time_add
print('Send message interval increased by ' + str(time_add) + ' seconds.')
print('Wait time now is ' + str(wait_time) + ' seconds')
time.sleep(e.seconds+10)
client(ImportChatInviteRequest(id_cat_channel_2.split('/joinchat/')[1]))
except errors.ChannelPrivateError:
print(' ')
print("ERROR04! New entity " + str(id_cat_channel_2[0]) + " is inaccessible!")
print(' ')
num_channels_per_cat[id_cat_channel[0]] -= 1
continue
except Exception as e:
print(' ')
print("ERROR05! New entity " + str(id_cat_channel_2[0]) + ". Sometimes it gives unexplained problem with the SESSION file. Delete it and do login again. Details: " + str(e))
print(' ')
num_channels_per_cat[id_cat_channel[0]] -= 1
continue
else: #The problem was because it was not subscribed, but since it is not subscribed, it has to leave after picking up all the data
pass
#Get all data
channel_full_info = client(GetFullChannelRequest(id_cat_channel_2))
total_liq_posts = client.get_messages(id_cat_channel_2, limit=1).total
#You took everything you needed, now you can already go away
client(LeaveChannelRequest(id_cat_channel_2))
else:
pass
#Get all data
channel_full_info = client(GetFullChannelRequest(id_cat_channel_2))
total_liq_posts = client.get_messages(id_cat_channel_2, limit=1).total
#You took everything you needed, now you can already go away
client(LeaveChannelRequest(id_cat_channel_2))
else: #New channel and that you are also subscribed or that you can to get data without to be a subscriber
pass
total_liq_posts = client.get_messages(id_cat_channel_2, limit=1).total
id_cat_channel_2 = id_cat_channel_2.strip()
if '/joinchat/' in id_cat_channel_2: #Private entity
try:
client(CheckChatInviteRequest(id_cat_channel_2.split('/joinchat/')[1]))
except errors.FloodWaitError as e:
print('FLOODING ERROR! Details: ' + str(e))
wait_time += time_add
print('Send message interval increased by ' + str(time_add) + ' seconds.')
print('Wait time now is ' + str(wait_time) + ' seconds')
time.sleep(e.seconds+10)
client(CheckChatInviteRequest(id_cat_channel_2.split('/joinchat/')[1]))
except Exception as e:
print()
print("ERROR06! Invite link expired or wrong. Details: " + str(e))
print()
num_channels_per_cat[id_cat_channel[0]] -= 1
continue
else:
pass
username_channel = id_cat_channel_2
else: #Public entity
username_channel = channel_full_info.chats[0].username
#Change 'id_cat_channel_2' in a list with ID and access_hash, like channels already classifies.
id_cat_channel_2 = []
id_cat_channel_2.append(int(channel_full_info.full_chat.id))
id_cat_channel_2.append(int(channel_full_info.chats[0].access_hash))
participants_channel = channel_full_info.full_chat.participants_count #Take the number of subscribers from the channel.
channel_name = channel_full_info.chats[0].title #Gets channel name. #https://stackoverflow.com/questions/46729921/how-to-get-channel-chat-user-name-of-forwarded-message-using-telethon
channel_specific_category = id_cat_channel[5]
#Create a channel data list (only one channel)
new_channel.append(participants_channel) #Today
new_channel.append(channel_name)
new_channel.append(total_liq_posts)
new_channel.append(username_channel)
new_channel.append(i_cat) #Adds the channel category to the specific channel data.
if num_msg != 'new':
new_channel.append(total_liq_posts - int(num_msg)) #Now minus last classification
new_channel.append(participants_channel - int(num_sub))
else:
new_channel.append(000) #Any flag
new_channel.append(000)
new_channel.append(channel_specific_category)
#List updated
channel_out += '-100' + str(channel_full_info.full_chat.id)
access_hash = channel_full_info.chats[0].access_hash
#Formatting. Access hash size for printing.
if int(access_hash) < 0:
if len(str(abs(access_hash))) < 19:
channel_out += ' ' + str(channel_full_info.chats[0].access_hash)
else:
channel_out += ' ' + str(channel_full_info.chats[0].access_hash)
else:
if len(str(abs(access_hash))) < 19:
channel_out += ' ' + str(channel_full_info.chats[0].access_hash)
else:
channel_out += ' ' + str(channel_full_info.chats[0].access_hash)
if channel_name != None and username_channel != None:
channel_out += ' -+- ' + channel_name
channel_out += ' -+- ' + username_channel
channel_out += ' ' + str(total_liq_posts)
channel_out += ' ' + str(participants_channel)
if len(id_cat_channel[5].strip()) != 0: #Entity have a specific category
channel_out += ' ' + id_cat_channel[5].strip()
channel_out += '\n'
list_out += channel_out
print(channel_out.strip())
#Add the new channel in the list of channels
channels_data.append(new_channel)
else:
print()
print("ERROR07! Channel maybe not exist or you have been banned from it.")
print()
num_channels_per_cat[id_cat_channel[0]] -= 1
list_out += cat_separator
print(cat_separator)
#If you want to see the content of 'channels_data' uncomment below.
#for i in channels_data:
# print(i)
#Sound alarm
try:
os.system('play -nq -t alsa synth {} sine {}'.format(duration, freq))
except:
print('You need to see the follow link to enable sound alarm in your Linux machine: https://stackoverflow.com/questions/16573051/sound-alarm-when-code-finishes')
else:
pass
print(' ')
print("If you want to unlink group from channel, it is the time. Do you want to post the classification:")
print(" 1: Yes")
print(" 2: No")
input2 = str(input()).strip()
if input2 == '2':
print('Post canceled.')
quit()
elif input2 != '1':
quit()
#Organizes in decreasing order of the number of subscribers:
channel_data_subscribes = sorted(channels_data, key=lambda l:l[0], reverse=True) #https://stackoverflow.com/questions/18563680/sorting-2d-list-python, https://stackoverflow.com/questions/8767779/python-sorted-function-not-working-the-way-it-should/8767786
#Organizes in decreasing order of the number of posts:
channel_data_posts = sorted(channels_data, key=lambda l:l[2], reverse=True)
channel_data_activity = sorted(channels_data, key=lambda l:l[5], reverse=True)
channel_data_growth = sorted(channels_data, key=lambda l:l[6], reverse=True)
#Read text used in classification. You need select a language.
print("In which language do you want to print the classification?")
from os import walk #https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory
filenames = next(walk('Files and Settings/Language'), (None, None, []))[2]
i = 0
for ii in filenames: #Show options
print(' ' + str(i).strip() + ": " + ii.split('.txt')[0]) #Remove the extension in file name
i += 1
print('------------------------------------')
lan_choice = input()
lan_file = filenames[int(lan_choice)]
lan_file_name = lan_file.split('.txt')[0]
print('Printing classification in follow language -> ' + lan_file_name)
#Read content of file language
cat = 0
category = []
body = []
f_lan_in = io.open('Files and Settings/Language/' + lan_file, 'r', encoding="utf-8")
for line in f_lan_in:
if line[0] == '#': #Comment line, skip
continue
if line.strip() == '------- Put the categories below ---------':
cat = 1
continue
if line.strip() == '------------ End categories -------------':
cat = 2
continue
if cat == 1:
category.append(line.strip())
if cat == 2:
body.append(line.strip())
footnote_txt = body[0] + '\n' + body[1]
title_txt = body[2]
Category_txt = body[3]
of_txt = body[4]
Part_txt = body[5]
Classification_txt = body[6]
by_txt = body[7]
messages_in_the_last_txt = body[8]
days_txt = body[9]
subscribers_in_the_last_txt = body[10]
link_emoji_txt = body[11]
Emoji_Index_txt = body[12]
subscribers_txt = body[13]
activity_txt = body[14]
messages_txt = body[15]
growth_txt = body[16]
empty_space = ' '
now = datetime.datetime.now()
date_now = now.strftime("%m-%d-%Y")
if lan_file_name == 'pt-br':
date_now = now.strftime("%d-%m-%Y")
footnote_txt += ' ' + date_now
#Take a sticker from Telegram's sticker list to use as a separator. A good separation sticker package: https://t.me/addstickers/SeparatoriDDB. https://docs.telethon.dev/en/latest/examples/working-with-messages.html#sending-stickers
from telethon.tl.functions.messages import GetAllStickersRequest
from telethon.tl.functions.messages import GetStickerSetRequest
from telethon.tl.types import InputStickerSetID
import os #https://stackoverflow.com/a/2507819/5175660
file_size = os.path.getsize("Files and Settings/ID and Access Hash of separator sticker (do not edit).txt")
if file_size == 0: #Take ID and access_hash of sticker separator package saved in "Saved Messages".
msg = client.get_messages('me') #'me' is the "Saved Messages" address. If no ID is specified, this command takes the last message posted.
sticker_id = msg[0].media.document.attributes[1].stickerset.id
sticker_access_hash = msg[0].media.document.attributes[1].stickerset.access_hash
f_sticker_id = io.open("Files and Settings/ID and Access Hash of separator sticker (do not edit).txt", 'w', encoding="utf-8")
print(sticker_id, file=f_sticker_id)
print(sticker_access_hash, file=f_sticker_id)
else: #Separation sticker package ID and access hash is already known
f_sticker_id = io.open("Files and Settings/ID and Access Hash of separator sticker (do not edit).txt", 'r', encoding="utf-8")
sticker_transfer = f_sticker_id.readlines()
sticker_id = int(sticker_transfer[0])
sticker_access_hash = int(sticker_transfer[1])
f_sticker_id.close()
stickers = client(GetStickerSetRequest(
stickerset=InputStickerSetID(
id=sticker_id, access_hash=sticker_access_hash
)
))
num_sticker1 = 17 #Sticker number chosen from the sticker group.
num_sticker2 = 5
for iii in entities_receive_invite: #Sends a specific sticker.
client.send_file(iii, stickers.documents[num_sticker2])
#Create the list of channels by number of subscribers and posts:
dig_max = []
#Take the size of the number with the largest number of subscribers. Source: https://stackoverflow.com/questions/2189800/length-of-an-integer-in-python
dig_max.append(len(str(channel_data_subscribes[0][0])))
dig_max.append(len(str(channel_data_posts[0][2]))) #Idem posts.
dig_max.append(len(str(channel_data_activity[0][5]))) #Idem activity
dig_max.append(len(str(channel_data_growth[0][6]))) #Idem growth
fixed_prefix = []
fixed_prefix_str = []
fixed_prefix.append('{:' + str(dig_max[0]) + 'd}') #Fixed size integer: https://stackoverflow.com/questions/45731862/correct-way-to-format-integers-with-fixed-length-and-space-padding
fixed_prefix.append('{:' + str(dig_max[2]) + 'd}') #Idem posts.
fixed_prefix.append('{:' + str(dig_max[1]) + 'd}') #Idem activity.
fixed_prefix.append('{:' + str(dig_max[3]) + 'd}') #Idem growth.
fixed_prefix_str.append('{:>' + str(dig_max[0]) + '}') #For string case.
fixed_prefix_str.append('{:>' + str(dig_max[2]) + '}')
fixed_prefix_str.append('{:>' + str(dig_max[1]) + '}')
fixed_prefix_str.append('{:>' + str(dig_max[3]) + '}')
wait_time += time_add_2 #Increase wait time for send messages
#result = client(functions.channels.SetDiscussionGroupRequest( #https://tl.telethon.dev/methods/channels/set_discussion_group.html#examples
# broadcast='dadasdsaddsad',
# group='sadasdsadasdasdasd'
#))
#print(result)
if len(str(date_last)) == 0: #First classification. Entities file withou a date. Don't print 'aticvity' and 'growth' criterions.
date_last = today
lapse_time = str((today - date_last).days).strip()
criterion_of_classification = [subscribers_txt, activity_txt, messages_txt, growth_txt]
time_in_criterion = [ '', ' (' + lapse_time + ' ' + days_txt + ')', '', ' (' + lapse_time + ' ' + days_txt + ')' ]
channel_data_ordered = []
channel_data_ordered.append(channel_data_subscribes)
channel_data_ordered.append(channel_data_activity)
channel_data_ordered.append(channel_data_posts)
channel_data_ordered.append(channel_data_growth)
index_criterion = [0,5,2,6]
list = ['void', 'void', 'void', 'void']
fixed = ['void', 'void', 'void', 'void']
num_part_per_cat = [] #Counts the number of parts per category. The result is independent of the criterion.
for i_criterion in range(0,4): #0 to 3 loop scanning over the classification criteria. There are 4, subscribers, activity, posts and growth, and each criterion is identified with an integer, 0 and 3.
if lapse_time == '0' and i_criterion in [1, 3]: #First classification. Don't print 'aticvity' and 'growth' criterions.
continue
for i in range(i_category): #The last value assumed by i_category already has the total number of categories.
num_parts = int(num_channels_per_cat[i]/num_channels_per_part) #Number of parts a category will have. The value is the same for any classification criteria. int() truncates the real number (transforms the real number in an integer without rounding).
num_parts_real = (num_channels_per_cat[i]*1.0)/(num_channels_per_part*1.0) #To convert an integer number to real just multiply by 1.0.
if num_parts_real > num_parts*1.0:
num_parts += 1
num_part_per_cat.append(num_parts)
i_part = 1
list[i_criterion] = title_txt + '\n<b>(' + Category_txt + ' ' + str(i+1) + ' ' + of_txt + ' ' + str(i_category) + ', ' + Part_txt + ' ' + str(i_part) + ' ' + of_txt + ' ' + str(num_parts) + ')</b>\n<code>'
list[i_criterion] += empty_space[:47] + '</code>\n<b>' + Classification_txt + ' ' + by_txt + ' ' + '<u>' + criterion_of_classification[i_criterion] + '</u></b>' + time_in_criterion[i_criterion] +':\n' #https://www.tutorialspoint.com/how-to-underline-a-text-in-html#:~:text=To%20underline%20a%20text%20in%20HTML%2C%20use%20the,also%20use%20the%20style%20attribute.
list[i_criterion] += category[i] + '\n'
i_channel_per_part = 1
for i_l_i in channel_data_ordered[i_criterion]: #i_l_i[0] is the number of subscribers, i_l_i[1] is the name of the channel, i_l_i[2] is the number of posts, i_l_i[3] is the username of the channel, i_l_i[4] is the category of the channel that is identified with an integer number, i_l_i[5] activity, i_l_i[6] growth and i_l_i[7] is the specific category.
if i_channel_per_part < num_channels_per_part + 1: #Scan until the number of channels reaches the maximum number per part.
if (i_l_i[4] == i): #Select by category
ic = i_l_i[index_criterion[i_criterion]]
if ic == 000: #Add number. New channel or not.
fixed[i_criterion] = '<code>' + fixed_prefix_str[i_criterion].format('-') + '</code>' #https://stackoverflow.com/questions/38119481/send-bold-italic-text-on-telegram-bot-with-html
else:
fixed[i_criterion] = '<code>' + fixed_prefix[i_criterion].format(ic) + '</code>'
if (len(i_l_i[1]) > 28): #Shorten the channel name.
i_l_i[1] = i_l_i[1][:20] + '...' #Limit string size: https://stackoverflow.com/questions/2872512/python-truncate-a-long-string
spec_cri = i_l_i[7].strip()
if len(spec_cri) != 0:
spec_cri = '(' + spec_cri + ') '
if '/joinchat/' in str(i_l_i[3]): #Add hyperlink. Public or private.
list[i_criterion] += fixed[i_criterion] + ' | ' + spec_cri + '<a href="' + str(i_l_i[3]) + '">' + str(i_l_i[1]) + '</a>\n'
else:
list[i_criterion] += fixed[i_criterion] + ' | ' + spec_cri + '<a href="https://t.me/' + str(i_l_i[3]) + '">' + str(i_l_i[1]) + '</a>\n'
i_channel_per_part += 1
else:
list[i_criterion] += '\n' + footnote_txt
for iii in entities_receive_invite: #Several entities to send, so it sends for each one.
time.sleep(wait_time)
try:
client.send_message(iii, list[i_criterion], parse_mode = "html", link_preview=False)
except errors.FloodWaitError as e:
print('FLOODING ERROR! Details: ' + str(e))
wait_time += time_add
print('Send message interval increased by ' + str(time_add) + ' seconds.')
print('Wait time now is ' + str(wait_time) + ' seconds')
time.sleep(e.seconds+10)
client.send_message(iii, list[i_criterion], parse_mode="html", link_preview=False)
else:
pass
i_part += 1
list[i_criterion] = title_txt + '\n<b>(' + Category_txt + ' ' + str(i+1) + ' ' + of_txt + ' ' + str(i_category) + ', ' + Part_txt + ' ' + str(i_part) + ' ' + of_txt + ' ' + str(num_parts) + ')</b>\n<code>'
list[i_criterion] += empty_space[:47] + '</code>\n<b>' + Classification_txt + ' ' + by_txt + ' ' + '<u>' + criterion_of_classification[i_criterion] + '</u></b>' + time_in_criterion[i_criterion] + ':\n'
list[i_criterion] += category[i] + '\n'
i_channel_per_part = 1
if (i_l_i[4] == i):
fixed[i_criterion] = '<code>' + fixed_prefix[i_criterion].format(i_l_i[index_criterion[i_criterion]]) + '</code>'
if (len(i_l_i[1]) > 28):
i_l_i[1] = i_l_i[1][:20] + '...'
spec_cri = i_l_i[7].strip()
if len(spec_cri) != 0:
spec_cri = '(' + spec_cri + ') '
if '/joinchat/' in str(i_l_i[3]):
list[i_criterion] += fixed[i_criterion] + ' | ' + spec_cri + '<a href="' + str(i_l_i[3]) + '">' + str(i_l_i[1]) + '</a>\n'
else:
list[i_criterion] += fixed[i_criterion] + ' | ' + spec_cri + '<a href="https://t.me/' + str(i_l_i[3]) + '">' + str(i_l_i[1]) + '</a>\n'
if i_channel_per_part < num_channels_per_part + 2:
list[i_criterion] += '\n' + footnote_txt
for iii in entities_receive_invite:
time.sleep(wait_time)
try:
client.send_message(iii, list[i_criterion], parse_mode="html", link_preview=False)
except errors.FloodWaitError as e:
print('FLOODING ERROR! Details: ' + str(e))
wait_time += time_add
print('Send message interval increased by ' + str(time_add) + ' seconds.')
print('Wait time now is ' + str(wait_time) + ' seconds')
time.sleep(e.seconds+10)
client.send_message(iii, list[i_criterion], parse_mode="html", link_preview=False)
else:
pass
if i_criterion in [0, 1, 2]:
for iii in entities_receive_invite:
try:
client.send_file(iii, stickers.documents[num_sticker1])
except errors.FloodWaitError as e:
print('FLOODING ERROR! Details: ' + str(e))
wait_time += time_add
print('Send message interval increased by ' + str(time_add) + ' seconds.')
print('Wait time now is ' + str(wait_time) + ' seconds')
time.sleep(e.seconds+10)
client.send_file(iii, stickers.documents[num_sticker1])
else:
pass
if lapse_time == '0': #First classification
print('First classification. Classification by activity and growth criteria will not be printed, only on the next classification, on another day.')
num_cri = 2
else:
for iii in entities_receive_invite:
try:
client.send_file(iii, stickers.documents[num_sticker1])
except errors.FloodWaitError as e:
print('FLOODING ERROR! Details: ' + str(e))
wait_time += time_add
print('Send message interval increased by ' + str(time_add) + ' seconds.')
print('Wait time now is ' + str(wait_time) + ' seconds')
time.sleep(e.seconds+10)
client.send_file(iii, stickers.documents[num_sticker1])
else:
pass
num_cri = 4
#Creates a link post for each category and part of the classification.
id_last_post = []
for iii in entities_receive_invite:
id_last_post.append(client.get_messages(iii, limit= 1)[0].id) #Take the id from the last message on the channel to use as a reference to create the links for each part of the rating.
pats_add = 0
for ii in num_channels_per_cat:
if ii > num_channels_per_part: #ii > 45
pats_add += int((ii-1)/num_channels_per_part)
for i in range(len(id_last_post)):
id_last_post[i] -= 1 + num_cri*((i_category+1) + pats_add) #Phase difference. Each category has at least a part totaling 'i_category' messages per criterion, including 1 sticker. Stickers have one in top. Apart from the first message, which is just a sticker, there are 17, plus the extra parts due to categories with more than one part. Each additional part creates two more messages, as there are two classification criteria, so 2 must be subtracted for each additional part.
#Sound alarm
try:
os.system('play -nq -t alsa synth {} sine {}'.format(duration, freq))
except:
print('You need to see the follow link to enable sound alarm in your Linux machine: https://stackoverflow.com/questions/16573051/sound-alarm-when-code-finishes')
else:
pass
import time
#Waiting 15 seconds to enable and disable channel comments
print(' ')
print("Do you want a wait for link comments group?")
print(" 1: Yes")
print(" 2: No")
input2 = str(input()).strip()
if input2 == '1':
print('Ok, you have 15 seconds to link the group to the each channel.')
time.sleep(15)
elif input2 == '2':
print('Ok, no waiting.')
else:
print('I do not understand. I will wait 60 seconds to post classifications.')
time.sleep(60)
#Pallete to navigate between classifications.
palette_content = [] #Each channel have a different palette.
for iii in range(len(id_last_post)):
palette_content.append(title_txt + '\n')
time_in_criterion[1] = ' (' + messages_in_the_last_txt + ' ' + lapse_time + ' ' + days_txt + ')'
time_in_criterion[3] = ' (' + subscribers_in_the_last_txt + ' ' + lapse_time + ' ' + days_txt + ')'
for iii in range(len(id_last_post)):
for i_criterion in range(0,4):
if lapse_time == '0' and i_criterion in [1, 3]:
continue
id_last_post[iii] += 1
palette_content[iii] += '\n<b>' + Classification_txt + ' ' + by_txt + ' ' + '<u>' + criterion_of_classification[i_criterion] + '</u></b>' + time_in_criterion[i_criterion] + ':\n'
for i in range(i_category):
palette_content[iii] += category[i] + '\n'
for ii in range(1, num_part_per_cat[i]+1):
id_last_post[iii] += 1
palette_content[iii] += '<a href="https://t.me/' + entitie_receive_prefix_link_message[iii] +'/' + str(id_last_post[iii]) + '">' + '[' + Part_txt + ' ' + str(ii) + ']</a> '
palette_content[iii] += '\n'
palette_content[iii] += '\n' + '<a href="' + link_emoji_txt + '">' + Emoji_Index_txt + '</a>\n' + footnote_txt
for iii in range(len(id_last_post)):
client.send_message(entities_receive_invite[iii], palette_content[iii], parse_mode = "html", link_preview=False)
#Prints data into an external file to make future classifications and statistics. Source: https://www.askpython.com/python/built-in-methods/python-print-to-file
import sys
print(' ')
print("Do you want to update the last classification (caution, this operation overwrites the classification entities list):")
print(" 1: Yes")
print(" 2: No")
input2 = str(input()).strip()
if input2 == '1':
print('Last classification updated.')
name_file_stat = 'Files and Settings/Statistics/' + date_now + '.txt'
sys.stdout = io.open(name_file_stat, 'w')
sys.stdout.write(list_out)
if path.exists('Files and Settings/Entities to Classify (backup).txt'):
os.remove('Files and Settings/Entities to Classify (backup).txt')
os.rename('Files and Settings/Entities to Classify.txt', 'Files and Settings/Entities to Classify (backup).txt')
sys.stdout = io.open('Files and Settings/Entities to Classify.txt', 'w')
sys.stdout.write('Date: ' + str(today.day) + '-' + str(today.month) + '-' + str(today.year) + '\n' + list_out)
sys.stdout.close()
sys.stdout = open("/dev/stdout", "w") #https://stackoverflow.com/questions/17743052/how-do-i-revert-sys-stdout-close
sys.stdout = sys.__stdout__ #reattach stdout to console. https://stackoverflow.com/questions/47376705/python-write-terminal-output-to-file-then-read-file
print(' ')
print("Do you want to send a copy of the updated classification to a channel?")
print(" 1: Yes")
print(" 2: No")
input3 = str(input()).strip()
if input3 == '1':
i = 0
print("Which channel?")
for ii in entities_receive: #Show options
num_ent.append(i)
print(' ' + str(i).split(' ID: ')[0].strip() + ": " + ii)
i += 1
print('------------------------------------')
input4 = input()
client.send_file(entities_receive[int(input4)].split(' ID: ')[0].strip(), file=name_file_stat)
elif input2 == '2':
print('No update in last classification.')
else:
print('WRONG INPUT!')
quit()
#Comment: The problem "telethon.errors.rpcerrorlist.FloodWaitError: A wait of 48039 seconds is required (caused by ResolveUsernameRequest)" with the command 'channel_name = client.get_entity(id_cat_channel).title'. Solution: https://github.com/LonamiWebs/Telethon/issues/494. Problem solved by replacing client.get_entity() with get_input_entity(), which also has the channel name. Added a time interval between use followed by client.get_entity() avoids blocking, but this is very problematic. To learn more about the problem, see: https://core.telegram.org/bots/faq#my-bot-is-hitting-limits-how-do-i-avoid-this