A股上市公司传智教育(股票代码 003032)旗下技术交流社区北京昌平校区

 找回密码
 加入黑马

QQ登录

只需一步,快速开始

一心研

初级黑马

  • 黑马币:6

  • 帖子:2

  • 精华:0

© 一心研 初级黑马   /  2020-10-18 17:03  /  4902 人查看  /  0 人回复  /   0 人收藏 转载请遵从CC协议 禁止商业使用本文

大家好,我在训练一个一词多义词向量模型的时候,出现了以下问题,试过了很多办法,都搞不定。请大家帮我看看,给我一点建议解决以下,非常感谢!

from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.linear_model import LogisticRegression
import matplotlib
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt

def plot_LSA(test_data,test_labels, savepath="PCA_demo.csv", plot=True):
        lsa = TruncatedSVD(n_components=2) # Truncated SVD works on term count/tf-idf matrices as returned by the vectorizers in sklearn.feature_extraction.text. In that context, it is known as latent semantic analysis (LSA).
        lsa.fit(np.array(test_data).reshape(-1,1))
        lsa_scores = lsa.transform(np.array(test_data).reshape(-1,1))
        color_mapper = {label:idx for idx,label in enumerate(set(test_labels))}
        color_column = [color_mapper[label] for label in test_labels]
        print ('colormapper=',color_mapper)
        #print ('colorColumn=',color_column)
        colors = ['blue','green','red']
        if plot:
            plt.scatter(lsa_scores[:,0], lsa_scores[:,1], s=8, alpha=.8, c=test_labels, cmap=matplotlib.colors.ListedColormap(colors))
            red_patch = mpatches.Patch(color='red', label='Negative')
            blue_patch = mpatches.Patch(color='blue', label='Neutral')
            green_patch = mpatches.Patch(color='green', label='Positive')
            plt.legend(handles=[red_patch, green_patch, blue_patch], prop={'size': 30})

fig = plt.figure(figsize=(16, 16))         
plot_LSA(X_train,y_train)
plt.show()

ValueError                                Traceback (most recent call last)
<ipython-input-73-1e3747e86ada> in <module>
     23
     24 fig = plt.figure(figsize=(16, 16))
---> 25 plot_LSA(X_train,y_train)
     26 plt.show()
     27

<ipython-input-73-1e3747e86ada> in plot_LSA(test_data, test_labels, savepath, plot)
      8 def plot_LSA(test_data,test_labels, savepath="PCA_demo.csv", plot=True):
      9         lsa = TruncatedSVD(n_components=2) # Truncated SVD works on term count/tf-idf matrices as returned by the vectorizers in sklearn.feature_extraction.text. In that context, it is known as latent semantic analysis (LSA).
---> 10         lsa.fit(np.array(test_data).reshape(-1,1))
     11         lsa_scores = lsa.transform(np.array(test_data).reshape(-1,1))
     12         color_mapper = {label:idx for idx,label in enumerate(set(test_labels))}

~\Anaconda3\lib\site-packages\sklearn\decomposition\truncated_svd.py in fit(self, X, y)
    139             Returns the transformer object.
    140         """
--> 141         self.fit_transform(X)
    142         return self
    143

~\Anaconda3\lib\site-packages\sklearn\decomposition\truncated_svd.py in fit_transform(self, X, y)
    158         """
    159         X = check_array(X, accept_sparse=['csr', 'csc'],
--> 160                         ensure_min_features=2)
    161         random_state = check_random_state(self.random_state)
    162

~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
    556                              " a minimum of %d is required%s."
    557                              % (n_features, array.shape, ensure_min_features,
--> 558                                 context))
    559
    560     if warn_on_dtype and dtype_orig is not None and array.dtype != dtype_orig:

ValueError: Found array with 1 feature(s) (shape=(128764, 1)) while a minimum of 2 is required.

<Figure size 1152x1152 with 0 Axes>


0 个回复

您需要登录后才可以回帖 登录 | 加入黑马