大家好,我在训练一个一词多义词向量模型的时候,出现了以下问题,试过了很多办法,都搞不定。请大家帮我看看,给我一点建议解决以下,非常感谢!
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.linear_model import LogisticRegression
import matplotlib
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
def plot_LSA(test_data,test_labels, savepath="PCA_demo.csv", plot=True):
lsa = TruncatedSVD(n_components=2) # Truncated SVD works on term count/tf-idf matrices as returned by the vectorizers in sklearn.feature_extraction.text. In that context, it is known as latent semantic analysis (LSA).
lsa.fit(np.array(test_data).reshape(-1,1))
lsa_scores = lsa.transform(np.array(test_data).reshape(-1,1))
color_mapper = {label:idx for idx,label in enumerate(set(test_labels))}
color_column = [color_mapper[label] for label in test_labels]
print ('colormapper=',color_mapper)
#print ('colorColumn=',color_column)
colors = ['blue','green','red']
if plot:
plt.scatter(lsa_scores[:,0], lsa_scores[:,1], s=8, alpha=.8, c=test_labels, cmap=matplotlib.colors.ListedColormap(colors))
red_patch = mpatches.Patch(color='red', label='Negative')
blue_patch = mpatches.Patch(color='blue', label='Neutral')
green_patch = mpatches.Patch(color='green', label='Positive')
plt.legend(handles=[red_patch, green_patch, blue_patch], prop={'size': 30})
fig = plt.figure(figsize=(16, 16))
plot_LSA(X_train,y_train)
plt.show()
ValueError Traceback (most recent call last)
<ipython-input-73-1e3747e86ada> in <module>
23
24 fig = plt.figure(figsize=(16, 16))
---> 25 plot_LSA(X_train,y_train)
26 plt.show()
27
<ipython-input-73-1e3747e86ada> in plot_LSA(test_data, test_labels, savepath, plot)
8 def plot_LSA(test_data,test_labels, savepath="PCA_demo.csv", plot=True):
9 lsa = TruncatedSVD(n_components=2) # Truncated SVD works on term count/tf-idf matrices as returned by the vectorizers in sklearn.feature_extraction.text. In that context, it is known as latent semantic analysis (LSA).
---> 10 lsa.fit(np.array(test_data).reshape(-1,1))
11 lsa_scores = lsa.transform(np.array(test_data).reshape(-1,1))
12 color_mapper = {label:idx for idx,label in enumerate(set(test_labels))}
~\Anaconda3\lib\site-packages\sklearn\decomposition\truncated_svd.py in fit(self, X, y)
139 Returns the transformer object.
140 """
--> 141 self.fit_transform(X)
142 return self
143
~\Anaconda3\lib\site-packages\sklearn\decomposition\truncated_svd.py in fit_transform(self, X, y)
158 """
159 X = check_array(X, accept_sparse=['csr', 'csc'],
--> 160 ensure_min_features=2)
161 random_state = check_random_state(self.random_state)
162
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
556 " a minimum of %d is required%s."
557 % (n_features, array.shape, ensure_min_features,
--> 558 context))
559
560 if warn_on_dtype and dtype_orig is not None and array.dtype != dtype_orig:
ValueError: Found array with 1 feature(s) (shape=(128764, 1)) while a minimum of 2 is required.
<Figure size 1152x1152 with 0 Axes>
|
|