[论坛提问] 如何debug以下这个问题

大家好，我在训练一个一词多义词向量模型的时候，出现了以下问题，试过了很多办法，都搞不定。请大家帮我看看，给我一点建议解决以下，非常感谢！

from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.linear_model import LogisticRegression
import matplotlib
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt

def plot_LSA(test_data,test_labels, savepath="PCA_demo.csv", plot=True):
      lsa = TruncatedSVD(n_components=2) # Truncated SVD works on term count/tf-idf matrices as returned by the vectorizers in sklearn.feature_extraction.text. In that context, it is known as latent semantic analysis (LSA).
      lsa.fit(np.array(test_data).reshape(-1,1))
      lsa_scores = lsa.transform(np.array(test_data).reshape(-1,1))
      color_mapper = {label:idx for idx,label in enumerate(set(test_labels))}
      color_column = [color_mapper[label] for label in test_labels]
      print ('colormapper=',color_mapper)
      #print ('colorColumn=',color_column)
      colors = ['blue','green','red']
      if plot:
         plt.scatter(lsa_scores[:,0], lsa_scores[:,1], s=8, alpha=.8, c=test_labels, cmap=matplotlib.colors.ListedColormap(colors))
         red_patch = mpatches.Patch(color='red', label='Negative')
         blue_patch = mpatches.Patch(color='blue', label='Neutral')
         green_patch = mpatches.Patch(color='green', label='Positive')
         plt.legend(handles=[red_patch, green_patch, blue_patch], prop={'size': 30})

fig = plt.figure(figsize=(16, 16))
plot_LSA(X_train,y_train)
plt.show()

ValueError                               Traceback (most recent call last)
<ipython-input-73-1e3747e86ada> in <module>
   23
   24 fig = plt.figure(figsize=(16, 16))
---> 25 plot_LSA(X_train,y_train)
   26 plt.show()
   27

<ipython-input-73-1e3747e86ada> in plot_LSA(test_data, test_labels, savepath, plot)
   8 def plot_LSA(test_data,test_labels, savepath="PCA_demo.csv", plot=True):
   9       lsa = TruncatedSVD(n_components=2) # Truncated SVD works on term count/tf-idf matrices as returned by the vectorizers in sklearn.feature_extraction.text. In that context, it is known as latent semantic analysis (LSA).
---> 10       lsa.fit(np.array(test_data).reshape(-1,1))
   11       lsa_scores = lsa.transform(np.array(test_data).reshape(-1,1))
   12       color_mapper = {label:idx for idx,label in enumerate(set(test_labels))}

~\Anaconda3\lib\site-packages\sklearn\decomposition\truncated_svd.py in fit(self, X, y)
139          Returns the transformer object.
140       """
--> 141       self.fit_transform(X)
142       return self
143

~\Anaconda3\lib\site-packages\sklearn\decomposition\truncated_svd.py in fit_transform(self, X, y)
158       """
159       X = check_array(X, accept_sparse=['csr', 'csc'],
--> 160                      ensure_min_features=2)
161       random_state = check_random_state(self.random_state)
162

~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
556                            " a minimum of %d is required%s."
557                            % (n_features, array.shape, ensure_min_features,
--> 558                               context))
559
560    if warn_on_dtype and dtype_orig is not None and array.dtype != dtype_orig:

ValueError: Found array with 1 feature(s) (shape=(128764, 1)) while a minimum of 2 is required.

<Figure size 1152x1152 with 0 Axes>

帐号		自动登录	找回密码
密码			加入黑马

[论坛提问] 如何debug以下这个问题

0 个回复