## Licensed to the Apache Software Foundation (ASF) under one or more# contributor license agreements. See the NOTICE file distributed with# this work for additional information regarding copyright ownership.# The ASF licenses this file to You under the Apache License, Version 2.0# (the "License"); you may not use this file except in compliance with# the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.#importarrayfromabcimportABCMetaimportcopyfromtypingimport(Any,Callable,Generic,List,Optional,overload,TypeVar,Union,TYPE_CHECKING,)importnumpyasnpfrompyspark.utilimportis_remote_onlyfrompyspark.ml.linalgimportDenseVector,Vector,Matrixfrompyspark.ml.utilimportIdentifiableifTYPE_CHECKING:frompyspark.ml._typingimportParamMap__all__=["Param","Params","TypeConverters"]T=TypeVar("T")P=TypeVar("P",bound="Params")
[docs]classParam(Generic[T]):""" A param with self-contained documentation. .. versionadded:: 1.3.0 """def__init__(self,parent:Identifiable,name:str,doc:str,typeConverter:Optional[Callable[[Any],T]]=None,):ifnotisinstance(parent,Identifiable):raiseTypeError("Parent must be an Identifiable but got type %s."%type(parent))self.parent=parent.uidself.name=str(name)self.doc=str(doc)self.typeConverter=TypeConverters.identityiftypeConverterisNoneelsetypeConverterdef_copy_new_parent(self,parent:Any)->"Param":"""Copy the current param to a new parent, must be a dummy param."""ifself.parent=="undefined":param=copy.copy(self)param.parent=parent.uidreturnparamelse:raiseValueError("Cannot copy from non-dummy parent %s."%parent)def__str__(self)->str:returnstr(self.parent)+"__"+self.namedef__repr__(self)->str:return"Param(parent=%r, name=%r, doc=%r)"%(self.parent,self.name,self.doc)def__hash__(self)->int:returnhash(str(self))def__eq__(self,other:Any)->bool:ifisinstance(other,Param):returnself.parent==other.parentandself.name==other.nameelse:returnFalse
[docs]classTypeConverters:""" Factory methods for common type conversion functions for `Param.typeConverter`. .. versionadded:: 2.0.0 """@staticmethoddef_is_numeric(value:Any)->bool:vtype=type(value)returnvtypein[int,float,np.float64,np.int64]orvtype.__name__=="long"@staticmethoddef_is_integer(value:Any)->bool:returnTypeConverters._is_numeric(value)andfloat(value).is_integer()@staticmethoddef_can_convert_to_list(value:Any)->bool:vtype=type(value)returnvtypein[list,np.ndarray,tuple,range,array.array]orisinstance(value,Vector)@staticmethoddef_can_convert_to_string(value:Any)->bool:vtype=type(value)returnisinstance(value,str)orvtypein[np.unicode_,np.string_,np.str_]
[docs]@staticmethoddefidentity(value:"T")->"T":""" Dummy converter that just returns value. """returnvalue
[docs]@staticmethoddeftoList(value:Any)->List:""" Convert a value to a list, if possible. """iftype(value)==list:returnvalueeliftype(value)in[np.ndarray,tuple,range,array.array]:returnlist(value)elifisinstance(value,Vector):returnlist(value.toArray())else:raiseTypeError("Could not convert %s to list"%value)
[docs]@staticmethoddeftoListFloat(value:Any)->List[float]:""" Convert a value to list of floats, if possible. """ifTypeConverters._can_convert_to_list(value):value=TypeConverters.toList(value)ifall(map(lambdav:TypeConverters._is_numeric(v),value)):return[float(v)forvinvalue]raiseTypeError("Could not convert %s to list of floats"%value)
[docs]@staticmethoddeftoListListFloat(value:Any)->List[List[float]]:""" Convert a value to list of list of floats, if possible. """ifTypeConverters._can_convert_to_list(value):value=TypeConverters.toList(value)return[TypeConverters.toListFloat(v)forvinvalue]raiseTypeError("Could not convert %s to list of list of floats"%value)
[docs]@staticmethoddeftoListInt(value:Any)->List[int]:""" Convert a value to list of ints, if possible. """ifTypeConverters._can_convert_to_list(value):value=TypeConverters.toList(value)ifall(map(lambdav:TypeConverters._is_integer(v),value)):return[int(v)forvinvalue]raiseTypeError("Could not convert %s to list of ints"%value)
[docs]@staticmethoddeftoListString(value:Any)->List[str]:""" Convert a value to list of strings, if possible. """ifTypeConverters._can_convert_to_list(value):value=TypeConverters.toList(value)ifall(map(lambdav:TypeConverters._can_convert_to_string(v),value)):return[TypeConverters.toString(v)forvinvalue]raiseTypeError("Could not convert %s to list of strings"%value)
[docs]@staticmethoddeftoVector(value:Any)->Vector:""" Convert a value to a MLlib Vector, if possible. """ifisinstance(value,Vector):returnvalueelifTypeConverters._can_convert_to_list(value):value=TypeConverters.toList(value)ifall(map(lambdav:TypeConverters._is_numeric(v),value)):returnDenseVector(value)raiseTypeError("Could not convert %s to vector"%value)
[docs]@staticmethoddeftoMatrix(value:Any)->Matrix:""" Convert a value to a MLlib Matrix, if possible. """ifisinstance(value,Matrix):returnvalueraiseTypeError("Could not convert %s to matrix"%value)
[docs]@staticmethoddeftoFloat(value:Any)->float:""" Convert a value to a float, if possible. """ifTypeConverters._is_numeric(value):returnfloat(value)else:raiseTypeError("Could not convert %s to float"%value)
[docs]@staticmethoddeftoInt(value:Any)->int:""" Convert a value to an int, if possible. """ifTypeConverters._is_integer(value):returnint(value)else:raiseTypeError("Could not convert %s to int"%value)
[docs]@staticmethoddeftoString(value:Any)->str:""" Convert a value to a string, if possible. """ifisinstance(value,str):returnvalueeliftype(value)in[np.string_,np.str_,np.unicode_]:returnstr(value)else:raiseTypeError("Could not convert %s to string type"%type(value))
[docs]@staticmethoddeftoBoolean(value:Any)->bool:""" Convert a value to a boolean, if possible. """iftype(value)==bool:returnvalueelse:raiseTypeError("Boolean Param requires value of type bool. Found %s."%type(value))
[docs]classParams(Identifiable,metaclass=ABCMeta):""" Components that take parameters. This also provides an internal param map to store parameter values attached to the instance. .. versionadded:: 1.3.0 """def__init__(self)->None:super(Params,self).__init__()#: internal param map for user-supplied values param mapself._paramMap:"ParamMap"={}#: internal param map for default valuesself._defaultParamMap:"ParamMap"={}#: value returned by :py:func:`params`self._params:Optional[List[Param]]=None# Copy the params from the class to the objectself._copy_params()def_copy_params(self)->None:""" Copy all params defined on the class to current object. """cls=type(self)src_name_attrs=[(x,getattr(cls,x))forxindir(cls)]src_params=list(filter(lambdanameAttr:isinstance(nameAttr[1],Param),src_name_attrs))forname,paraminsrc_params:setattr(self,name,param._copy_new_parent(self))@propertydefparams(self)->List[Param]:""" Returns all params ordered by name. The default implementation uses :py:func:`dir` to get all attributes of type :py:class:`Param`. """ifself._paramsisNone:self._params=list(filter(lambdaattr:isinstance(attr,Param),[getattr(self,x)forxindir(self)ifx!="params"andnotisinstance(getattr(type(self),x,None),property)],))returnself._params
[docs]defexplainParam(self,param:Union[str,Param])->str:""" Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string. """param=self._resolveParam(param)values=[]ifself.isDefined(param):ifparaminself._defaultParamMap:values.append("default: %s"%self._defaultParamMap[param])ifparaminself._paramMap:values.append("current: %s"%self._paramMap[param])else:values.append("undefined")valueStr="("+", ".join(values)+")"return"%s: %s%s"%(param.name,param.doc,valueStr)
[docs]defexplainParams(self)->str:""" Returns the documentation of all params with their optionally default values and user-supplied values. """return"\n".join([self.explainParam(param)forparaminself.params])
[docs]defgetParam(self,paramName:str)->Param:""" Gets a param by its name. """param=getattr(self,paramName)ifisinstance(param,Param):returnparamelse:raiseValueError("Cannot find param with name %s."%paramName)
[docs]defisSet(self,param:Union[str,Param[Any]])->bool:""" Checks whether a param is explicitly set by user. """param=self._resolveParam(param)returnparaminself._paramMap
[docs]defhasDefault(self,param:Union[str,Param[Any]])->bool:""" Checks whether a param has a default value. """param=self._resolveParam(param)returnparaminself._defaultParamMap
[docs]defisDefined(self,param:Union[str,Param[Any]])->bool:""" Checks whether a param is explicitly set by user or has a default value. """returnself.isSet(param)orself.hasDefault(param)
[docs]defhasParam(self,paramName:str)->bool:""" Tests whether this instance contains a param with a given (string) name. """ifisinstance(paramName,str):p=getattr(self,paramName,None)returnisinstance(p,Param)else:raiseTypeError("hasParam(): paramName must be a string")
[docs]defgetOrDefault(self,param:Union[str,Param[T]])->Union[Any,T]:""" Gets the value of a param in the user-supplied param map or its default value. Raises an error if neither is set. """param=self._resolveParam(param)ifparaminself._paramMap:returnself._paramMap[param]else:returnself._defaultParamMap[param]
[docs]defextractParamMap(self,extra:Optional["ParamMap"]=None)->"ParamMap":""" Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra. Parameters ---------- extra : dict, optional extra param values Returns ------- dict merged param map """ifextraisNone:extra=dict()paramMap=self._defaultParamMap.copy()paramMap.update(self._paramMap)paramMap.update(extra)returnparamMap
[docs]defcopy(self:P,extra:Optional["ParamMap"]=None)->P:""" Creates a copy of this instance with the same uid and some extra params. The default implementation creates a shallow copy using :py:func:`copy.copy`, and then copies the embedded and extra parameters over and returns the copy. Subclasses should override this method if the default approach is not sufficient. Parameters ---------- extra : dict, optional Extra parameters to copy to the new instance Returns ------- :py:class:`Params` Copy of this instance """ifextraisNone:extra=dict()that=copy.copy(self)that._paramMap={}that._defaultParamMap={}returnself._copyValues(that,extra)
[docs]defset(self,param:Param,value:Any)->None:""" Sets a parameter in the embedded param map. """self._shouldOwn(param)try:value=param.typeConverter(value)exceptValueErrorase:raiseValueError('Invalid param value given for param "%s". %s'%(param.name,e))self._paramMap[param]=value
def_shouldOwn(self,param:Param)->None:""" Validates that the input param belongs to this Params instance. """ifnot(self.uid==param.parentandself.hasParam(param.name)):raiseValueError("Param %r does not belong to %r."%(param,self))def_resolveParam(self,param:Union[str,Param])->Param:""" Resolves a param and validates the ownership. Parameters ---------- param : str or :py:class:`Param` param name or the param instance, which must belong to this Params instance Returns ------- :py:class:`Param` resolved param instance """ifisinstance(param,Param):self._shouldOwn(param)returnparamelifisinstance(param,str):returnself.getParam(param)else:raiseTypeError("Cannot resolve %r as a param."%param)def_testOwnParam(self,param_parent:str,param_name:str)->bool:""" Test the ownership. Return True or False """returnself.uid==param_parentandself.hasParam(param_name)@staticmethoddef_dummy()->"Params":""" Returns a dummy Params instance used as a placeholder to generate docs. """dummy=Params()dummy.uid="undefined"returndummydef_set(self:P,**kwargs:Any)->P:""" Sets user-supplied params. """forparam,valueinkwargs.items():p=getattr(self,param)ifvalueisnotNone:try:value=p.typeConverter(value)exceptTypeErrorase:raiseTypeError('Invalid param value given for param "%s". %s'%(p.name,e))self._paramMap[p]=valuereturnself
[docs]defclear(self,param:Param)->None:""" Clears a param from the param map if it has been explicitly set. """ifself.isSet(param):delself._paramMap[param]
def_setDefault(self:P,**kwargs:Any)->P:""" Sets default params. """ifnotis_remote_only():frompy4j.java_gatewayimportJavaObjectforparam,valueinkwargs.items():p=getattr(self,param)ifvalueisnotNoneand(is_remote_only()ornotisinstance(value,JavaObject)):try:value=p.typeConverter(value)exceptTypeErrorase:raiseTypeError('Invalid default param value given for param "%s". %s'%(p.name,e))self._defaultParamMap[p]=valuereturnselfdef_copyValues(self,to:P,extra:Optional["ParamMap"]=None)->P:""" Copies param values from this instance to another instance for params shared by them. Parameters ---------- to : :py:class:`Params` the target instance extra : dict, optional extra params to be copied Returns ------- :py:class:`Params` the target instance with param values copied """paramMap=self._paramMap.copy()ifisinstance(extra,dict):forparam,valueinextra.items():ifisinstance(param,Param):paramMap[param]=valueelse:raiseTypeError("Expecting a valid instance of Param, but received: {}".format(param))elifextraisnotNone:raiseTypeError("Expecting a dict, but received an object of type {}.".format(type(extra)))forparaminself.params:# copy default paramsifparaminself._defaultParamMapandto.hasParam(param.name):to._defaultParamMap[to.getParam(param.name)]=self._defaultParamMap[param]# copy explicitly set paramsifparaminparamMapandto.hasParam(param.name):to._set(**{param.name:paramMap[param]})returntodef_resetUid(self:P,newUid:Any)->P:""" Changes the uid of this instance. This updates both the stored uid and the parent uid of params and param maps. This is used by persistence (loading). Parameters ---------- newUid new uid to use, which is converted to unicode Returns ------- :py:class:`Params` same instance, but with the uid and Param.parent values updated, including within param maps """newUid=str(newUid)self.uid=newUidnewDefaultParamMap=dict()newParamMap=dict()forparaminself.params:newParam=copy.copy(param)newParam.parent=newUidifparaminself._defaultParamMap:newDefaultParamMap[newParam]=self._defaultParamMap[param]ifparaminself._paramMap:newParamMap[newParam]=self._paramMap[param]param.parent=newUidself._defaultParamMap=newDefaultParamMapself._paramMap=newParamMapreturnself