Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

'FinBert' object has no attribute 'class_weights' #69

Open
YuchenLuan22 opened this issue Nov 14, 2023 · 0 comments
Open

'FinBert' object has no attribute 'class_weights' #69

YuchenLuan22 opened this issue Nov 14, 2023 · 0 comments

Comments

@YuchenLuan22
Copy link

image
from future import absolute_import, division, print_function

import random

import pandas as pd
from torch.nn import MSELoss, CrossEntropyLoss
from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler,
TensorDataset)
from tqdm import tqdm_notebook as tqdm
from tqdm import trange
from nltk.tokenize import sent_tokenize
from finbert.utils import *
import numpy as np
import logging

from transformers.optimization import AdamW, get_linear_schedule_with_warmup
from transformers import AutoTokenizer

logger = logging.getLogger(name)

tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

class Config(object):
"""The configuration class for training."""

def __init__(self,
             data_dir,
             bert_model,
             model_dir,
             max_seq_length=64,
             train_batch_size=32,
             eval_batch_size=32,
             learning_rate=5e-5,
             num_train_epochs=10.0,
             warm_up_proportion=0.1,
             no_cuda=False,
             do_lower_case=True,
             seed=42,
             local_rank=-1,
             gradient_accumulation_steps=1,
             fp16=False,
             output_mode='classification',
             discriminate=True,
             gradual_unfreeze=True,
             encoder_no=12,
             base_model='bert-base-uncased'):
    """
    Parameters
    ----------
    data_dir: str
        Path for the training and evaluation datasets.
    bert_model: BertModel
        The BERT model to be used. For example: BertForSequenceClassification.from_pretrained(...)
    model_dir: str
        The path where the resulting model will be saved.
    max_seq_length: int
        The maximum length of the sequence to be used. Default value is 64.
    train_batch_size: int
        The batch size for the training. Default value is 32.
    eval_batch_size: int
        The batch size for the evaluation. Default value is 32.
    learning_rate: float
        The learning rate. Default value is 5e5.
    num_train_epochs: int
        Number of epochs to train. Default value is 4.
    warm_up_proportion: float
        During the training, the learning rate is linearly increased. This value determines when the learning rate
        reaches the intended learning rate. Default value is 0.1.
    no_cuda: bool
        Determines whether to use gpu. Default is False.
    do_lower_case: bool
        Determines whether to make all training and evaluation examples lower case. Default is True.
    seed: int
        Random seed. Defaults to 42.
    local_rank: int
        Used for number of gpu's that will be utilized. If set -1, no distributed training will be done. Default
        value is -1.
    gradient_accumulation_steps: int
        Number of gradient accumulations steps. Defaults to 1.
    fp16: bool
        Determines whether to use 16 bits for floats, instead of 32.
    output_mode: 'classification' or 'regression'
        Determines whether the task is classification or regression.
    discriminate: bool
        Determines whether to apply discriminative fine-tuning.
    gradual_unfreeze: bool
        Determines whether to gradually unfreeze lower and lower layers as the training goes on.
    encoder_no: int
        Starting from which layer the model is going to be finetuned. If set 12, whole model is going to be
        fine-tuned. If set, for example, 6, only the last 6 layers will be fine-tuned.
    """
    self.data_dir = data_dir
    self.bert_model = bert_model
    self.model_dir = model_dir
    self.do_lower_case = do_lower_case
    self.max_seq_length = max_seq_length
    self.train_batch_size = train_batch_size
    self.local_rank = local_rank
    self.eval_batch_size = eval_batch_size
    self.learning_rate = learning_rate
    self.num_train_epochs = num_train_epochs
    self.warm_up_proportion = warm_up_proportion
    self.no_cuda = no_cuda
    self.seed = seed
    self.gradient_accumulation_steps = gradient_accumulation_steps
    self.output_mode = output_mode
    self.fp16 = fp16
    self.discriminate = discriminate
    self.gradual_unfreeze = gradual_unfreeze
    self.encoder_no = encoder_no
    self.base_model = base_model

class FinBert(object):
"""
The main class for FinBERT.
"""

def __init__(self,
             config):
    self.config = config

def prepare_model(self, label_list):
    """
    Sets some of the components of the model: Dataset processor, number of labels, usage of gpu and distributed
    training, gradient accumulation steps and tokenizer.
    Parameters
    ----------
    label_list: list
        The list of labels values in the dataset. For example: ['positive','negative','neutral']
    """

    self.processors = {
        "finsent": FinSentProcessor
    }

    self.num_labels_task = {
        'finsent': 2
    }

    if self.config.local_rank == -1 or self.config.no_cuda:
        self.device = torch.device("cuda" if torch.cuda.is_available() and not self.config.no_cuda else "cpu")
        self.n_gpu = torch.cuda.device_count()
    else:
        torch.cuda.set_device(self.config.local_rank)
        self.device = torch.device("cuda", self.config.local_rank)
        self.n_gpu = 1
        # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.distributed.init_process_group(backend='nccl')
    logger.info("device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".format(
        self.device, self.n_gpu, bool(self.config.local_rank != -1), self.config.fp16))

    if self.config.gradient_accumulation_steps < 1:
        raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format(
            self.config.gradient_accumulation_steps))

    self.config.train_batch_size = self.config.train_batch_size // self.config.gradient_accumulation_steps

    random.seed(self.config.seed)
    np.random.seed(self.config.seed)
    torch.manual_seed(self.config.seed)

    if self.n_gpu > 0:
        torch.cuda.manual_seed_all(self.config.seed)

    if os.path.exists(self.config.model_dir) and os.listdir(self.config.model_dir):
        raise ValueError("Output directory ({}) already exists and is not empty.".format(self.config.model_dir))
    if not os.path.exists(self.config.model_dir):
        os.makedirs(self.config.model_dir)

    self.processor = self.processors['finsent']()
    self.num_labels = len(label_list)
    self.label_list = label_list

    self.tokenizer = AutoTokenizer.from_pretrained(self.base_model, do_lower_case=self.config.do_lower_case)

def get_data(self, phase):
    """
    Gets the data for training or evaluation. It returns the data in the format that pytorch will process. In the
    data directory, there should be a .csv file with the name <phase>.csv
    Parameters
    ----------
    phase: str
        Name of the dataset that will be used in that phase. For example if there is a 'train.csv' in the data
        folder, it should be set to 'train'.
    Returns
    -------
    examples: list
        A list of InputExample's. Each InputExample is an object that includes the information for each example;
        text, id, label...
    """

    self.num_train_optimization_steps = None
    examples = None
    examples = self.processor.get_examples(self.config.data_dir, phase)
    self.num_train_optimization_steps = int(
        len(
            examples) / self.config.train_batch_size / self.config.gradient_accumulation_steps) * self.config.num_train_epochs

    if phase == 'train':
        train = pd.read_csv(os.path.join(self.config.data_dir, 'train.csv'), sep='\t', index_col=False)
        weights = list()
        labels = self.label_list

        class_weights = [train.shape[0] / train[train.label == label].shape[0] for label in labels]
        self.class_weights = torch.tensor(class_weights)

    return examples

def create_the_model(self):
    """
    Creates the model. Sets the model to be trained and the optimizer.
    """

    model = self.config.bert_model

    model.to(self.device)

    # Prepare optimizer
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']

    lr = self.config.learning_rate
    dft_rate = 1.2

    if self.config.discriminate:
        # apply the discriminative fine-tuning. discrimination rate is governed by dft_rate.

        encoder_params = []
        for i in range(12):
            encoder_decay = {
                'params': [p for n, p in list(model.bert.encoder.layer[i].named_parameters()) if
                           not any(nd in n for nd in no_decay)],
                'weight_decay': 0.01,
                'lr': lr / (dft_rate ** (12 - i))}
            encoder_nodecay = {
                'params': [p for n, p in list(model.bert.encoder.layer[i].named_parameters()) if
                           any(nd in n for nd in no_decay)],
                'weight_decay': 0.0,
                'lr': lr / (dft_rate ** (12 - i))}
            encoder_params.append(encoder_decay)
            encoder_params.append(encoder_nodecay)

        optimizer_grouped_parameters = [
            {'params': [p for n, p in list(model.bert.embeddings.named_parameters()) if
                        not any(nd in n for nd in no_decay)],
             'weight_decay': 0.01,
             'lr': lr / (dft_rate ** 13)},
            {'params': [p for n, p in list(model.bert.embeddings.named_parameters()) if
                        any(nd in n for nd in no_decay)],
             'weight_decay': 0.0,
             'lr': lr / (dft_rate ** 13)},
            {'params': [p for n, p in list(model.bert.pooler.named_parameters()) if
                        not any(nd in n for nd in no_decay)],
             'weight_decay': 0.01,
             'lr': lr},
            {'params': [p for n, p in list(model.bert.pooler.named_parameters()) if
                        any(nd in n for nd in no_decay)],
             'weight_decay': 0.0,
             'lr': lr},
            {'params': [p for n, p in list(model.classifier.named_parameters()) if
                        not any(nd in n for nd in no_decay)],
             'weight_decay': 0.01,
             'lr': lr},
            {'params': [p for n, p in list(model.classifier.named_parameters()) if any(nd in n for nd in no_decay)],
             'weight_decay': 0.0,
             'lr': lr}]

        optimizer_grouped_parameters.extend(encoder_params)


    else:
        param_optimizer = list(model.named_parameters())

        optimizer_grouped_parameters = [
            {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
             'weight_decay': 0.01},
            {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
        ]

    schedule = "warmup_linear"


    self.num_warmup_steps = int(float(self.num_train_optimization_steps) * self.config.warm_up_proportion)

    self.optimizer = AdamW(optimizer_grouped_parameters,
                      lr=self.config.learning_rate,
                      correct_bias=False)

    self.scheduler = get_linear_schedule_with_warmup(self.optimizer,
                                                num_warmup_steps=self.num_warmup_steps,
                                                num_training_steps=self.num_train_optimization_steps)

    return model

def get_loader(self, examples, phase):
    """
    Creates a data loader object for a dataset.
    Parameters
    ----------
    examples: list
        The list of InputExample's.
    phase: 'train' or 'eval'
        Determines whether to use random sampling or sequential sampling depending on the phase.
    Returns
    -------
    dataloader: DataLoader
        The data loader object.
    """

    features = convert_examples_to_features(examples, self.label_list,
                                            self.config.max_seq_length,
                                            self.tokenizer,
                                            self.config.output_mode)

    # Log the necessasry information
    logger.info("***** Loading data *****")
    logger.info("  Num examples = %d", len(examples))
    logger.info("  Batch size = %d", self.config.train_batch_size)
    logger.info("  Num steps = %d", self.num_train_optimization_steps)

    # Load the data, make it into TensorDataset
    all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
    all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long)
    all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long)

    if self.config.output_mode == "classification":
        all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.long)
    elif self.config.output_mode == "regression":
        all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.float)

    try:
        all_agree_ids = torch.tensor([f.agree for f in features], dtype=torch.long)
    except:
        all_agree_ids = torch.tensor([0.0 for f in features], dtype=torch.long)

    data = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, all_label_ids, all_agree_ids)

    # Distributed, if necessary
    if phase == 'train':
        my_sampler = RandomSampler(data)
    elif phase == 'eval':
        my_sampler = SequentialSampler(data)

    dataloader = DataLoader(data, sampler=my_sampler, batch_size=self.config.train_batch_size)
    return dataloader

def train(self, train_examples, model):
    """
    Trains the model.
    Parameters
    ----------
    examples: list
        Contains the data as a list of InputExample's
    model: BertModel
        The Bert model to be trained.
    weights: list
        Contains class weights.
    Returns
    -------
    model: BertModel
        The trained model.
    """

    validation_examples = self.get_data('/root/autodl-tmp/finBERT-master/notebooks/data/sentiment_data/validation')

    global_step = 0

    self.validation_losses = []

    # Training
    train_dataloader = self.get_loader(train_examples, 'train')

    model.train()

    step_number = len(train_dataloader)

    i = 0
    for _ in trange(int(self.config.num_train_epochs), desc="Epoch"):

        model.train()

        tr_loss = 0
        nb_tr_examples, nb_tr_steps = 0, 0

        for step, batch in enumerate(tqdm(train_dataloader, desc='Iteration')):

            if (self.config.gradual_unfreeze and i == 0):
                for param in model.bert.parameters():
                    param.requires_grad = False

            if (step % (step_number // 3)) == 0:
                i += 1

            if (self.config.gradual_unfreeze and i > 1 and i < self.config.encoder_no):

                for k in range(i - 1):

                    try:
                        for param in model.bert.encoder.layer[self.config.encoder_no - 1 - k].parameters():
                            param.requires_grad = True
                    except:
                        pass

            if (self.config.gradual_unfreeze and i > self.config.encoder_no + 1):
                for param in model.bert.embeddings.parameters():
                    param.requires_grad = True

            batch = tuple(t.to(self.device) for t in batch)

            input_ids, attention_mask, token_type_ids, label_ids, agree_ids = batch

            logits = model(input_ids, attention_mask, token_type_ids)[0]
            weights = self.class_weights.to(self.device)

            if self.config.output_mode == "classification":
                loss_fct = CrossEntropyLoss(weight=weights)
                loss = loss_fct(logits.view(-1, self.num_labels), label_ids.view(-1))
            elif self.config.output_mode == "regression":
                loss_fct = MSELoss()
                loss = loss_fct(logits.view(-1), label_ids.view(-1))

            if self.config.gradient_accumulation_steps > 1:
                loss = loss / self.config.gradient_accumulation_steps
            else:
                loss.backward()

            tr_loss += loss.item()
            nb_tr_examples += input_ids.size(0)
            nb_tr_steps += 1
            if (step + 1) % self.config.gradient_accumulation_steps == 0:
                if self.config.fp16:
                    lr_this_step = self.config.learning_rate * warmup_linear(
                        global_step / self.num_train_optimization_steps, self.config.warm_up_proportion)
                    for param_group in self.optimizer.param_groups:
                        param_group['lr'] = lr_this_step
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                self.optimizer.step()
                self.scheduler.step()
                self.optimizer.zero_grad()
                global_step += 1

        # Validation

        validation_loader = self.get_loader(validation_examples, phase='eval')
        model.eval()

        valid_loss, valid_accuracy = 0, 0
        nb_valid_steps, nb_valid_examples = 0, 0

        for input_ids, attention_mask, token_type_ids, label_ids, agree_ids in tqdm(validation_loader, desc="Validating"):
            input_ids = input_ids.to(self.device)
            attention_mask = attention_mask.to(self.device)
            token_type_ids = token_type_ids.to(self.device)
            label_ids = label_ids.to(self.device)
            agree_ids = agree_ids.to(self.device)

            with torch.no_grad():
                logits = model(input_ids, attention_mask, token_type_ids)[0]

                if self.config.output_mode == "classification":
                    loss_fct = CrossEntropyLoss(weight=weights)
                    tmp_valid_loss = loss_fct(logits.view(-1, self.num_labels), label_ids.view(-1))
                elif self.config.output_mode == "regression":
                    loss_fct = MSELoss()
                    tmp_valid_loss = loss_fct(logits.view(-1), label_ids.view(-1))

                valid_loss += tmp_valid_loss.mean().item()

                nb_valid_steps += 1

        valid_loss = valid_loss / nb_valid_steps

        self.validation_losses.append(valid_loss)
        print("Validation losses: {}".format(self.validation_losses))

        if valid_loss == min(self.validation_losses):

            try:
                os.remove(self.config.model_dir / ('temporary' + str(best_model)))
            except:
                print('No best model found')
            torch.save({'epoch': str(i), 'state_dict': model.state_dict()},
                       self.config.model_dir / ('temporary' + str(i)))
            best_model = i

    # Save a trained model and the associated configuration
    checkpoint = torch.load(self.config.model_dir / ('temporary' + str(best_model)))
    model.load_state_dict(checkpoint['state_dict'])
    model_to_save = model.module if hasattr(model, 'module') else model  # Only save the model it-self
    output_model_file = os.path.join(self.config.model_dir, WEIGHTS_NAME)
    torch.save(model_to_save.state_dict(), output_model_file)
    output_config_file = os.path.join(self.config.model_dir, CONFIG_NAME)
    with open(output_config_file, 'w') as f:
        f.write(model_to_save.config.to_json_string())
    os.remove(self.config.model_dir / ('temporary' + str(best_model)))
    return model

def evaluate(self, model, examples):
    """
    Evaluate the model.
    Parameters
    ----------
    model: BertModel
        The model to be evaluated.
    examples: list
        Evaluation data as a list of InputExample's/
    Returns
    -------
    evaluation_df: pd.DataFrame
        A dataframe that includes for each example predicted probability and labels.
    """

    eval_loader = self.get_loader(examples, phase='eval')

    logger.info("***** Running evaluation ***** ")
    logger.info("  Num examples = %d", len(examples))
    logger.info("  Batch size = %d", self.config.eval_batch_size)

    model.eval()
    eval_loss, eval_accuracy = 0, 0
    nb_eval_steps, nb_eval_examples = 0, 0

    predictions = []
    labels = []
    agree_levels = []
    text_ids = []

    for input_ids, attention_mask, token_type_ids, label_ids, agree_ids in tqdm(eval_loader, desc="Testing"):
        input_ids = input_ids.to(self.device)
        attention_mask = attention_mask.to(self.device)
        token_type_ids = token_type_ids.to(self.device)
        label_ids = label_ids.to(self.device)
        agree_ids = agree_ids.to(self.device)

        with torch.no_grad():
            logits = model(input_ids, attention_mask, token_type_ids)[0]

            if self.config.output_mode == "classification":
                loss_fct = CrossEntropyLoss()
                tmp_eval_loss = loss_fct(logits.view(-1, self.num_labels), label_ids.view(-1))
            elif self.config.output_mode == "regression":
                loss_fct = MSELoss()
                tmp_eval_loss = loss_fct(logits.view(-1), label_ids.view(-1))

            np_logits = logits.cpu().numpy()

            if self.config.output_mode == 'classification':
                prediction = np.array(np_logits)
            elif self.config.output_mode == "regression":
                prediction = np.array(np_logits)

            for agree_id in agree_ids:
                agree_levels.append(agree_id.item())

            for label_id in label_ids:
                labels.append(label_id.item())

            for pred in prediction:
                predictions.append(pred)

            text_ids.append(input_ids)

            # tmp_eval_loss = loss_fct(logits.view(-1, self.num_labels), label_ids.view(-1))
            # tmp_eval_loss = model(input_ids, token_type_ids, attention_mask, label_ids)

            eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1

        # logits = logits.detach().cpu().numpy()
        # label_ids = label_ids.to('cpu').numpy()
        # tmp_eval_accuracy = accuracy(logits, label_ids)

        # eval_loss += tmp_eval_loss.mean().item()
        # eval_accuracy += tmp_eval_accuracy

    evaluation_df = pd.DataFrame({'predictions': predictions, 'labels': labels, "agree_levels": agree_levels})

    return evaluation_df

def predict(text, model, write_to_csv=False, path=None, use_gpu=False, gpu_name='cuda:0', batch_size=5):
"""
Predict sentiments of sentences in a given text. The function first tokenizes sentences, make predictions and write
results.
Parameters
----------
text: string
text to be analyzed
model: BertForSequenceClassification
path to the classifier model
write_to_csv (optional): bool
path (optional): string
path to write the string
use_gpu: (optional): bool
enables inference on GPU
gpu_name: (optional): string
multi-gpu support: allows specifying which gpu to use
batch_size: (optional): int
size of batching chunks
"""
model.eval()

sentences = sent_tokenize(text)

device = gpu_name if use_gpu and torch.cuda.is_available() else "cpu"
logging.info("Using device: %s " % device)
label_list = ['positive', 'negative', 'neutral']
label_dict = {0: 'positive', 1: 'negative', 2: 'neutral'}
result = pd.DataFrame(columns=['sentence', 'logit', 'prediction', 'sentiment_score'])
for batch in chunks(sentences, batch_size):
    examples = [InputExample(str(i), sentence) for i, sentence in enumerate(batch)]

    features = convert_examples_to_features(examples, label_list, 64, tokenizer)

    all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long).to(device)
    all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long).to(device)
    all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long).to(device)

    with torch.no_grad():
        model     = model.to(device)

        logits = model(all_input_ids, all_attention_mask, all_token_type_ids)[0]
        logging.info(logits)
        logits = softmax(np.array(logits.cpu()))
        sentiment_score = pd.Series(logits[:, 0] - logits[:, 1])
        predictions = np.squeeze(np.argmax(logits, axis=1))

        batch_result = {'sentence': batch,
                        'logit': list(logits),
                        'prediction': predictions,
                        'sentiment_score': sentiment_score}

        batch_result = pd.DataFrame(batch_result)
        result = pd.concat([result, batch_result], ignore_index=True)

result['prediction'] = result.prediction.apply(lambda x: label_dict[x])
if write_to_csv:
    result.to_csv(path, sep=',', index=False)

return result

how to fix this problem?? thx

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant